{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extracting Data\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "'id_file' represents the `'subject_list.txt'` given with HCP data (inside the hcp_task folder) by NMA.\n",
    "\n",
    "It contains the subject ids of the 100 participants."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "id_file = r'/Users/rajdeep_ch/Documents/nma/project/hcp_task/subjects_list.txt' "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# using .loadtxt() method from numpy to load the text file\n",
    "id_Series = np.loadtxt(id_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The values contained in `id_Series` are floating point numbers.\n",
    "\n",
    "They are first converted to `int` and then to `str` type."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "id_Series_str = [str(int(sub_id)) for sub_id in id_Series]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "csv_file = r'/Users/rajdeep_ch/Documents/nma/project/alldata.csv' \n",
    "\n",
    "allData_df = pd.read_csv(csv_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `allData_df` contains data for all 1200 participants across all the tasks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1207, 383)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allData_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Unnamed: 1</th>\n",
       "      <th>Unnamed: 2</th>\n",
       "      <th>Unnamed: 3</th>\n",
       "      <th>Unnamed: 4</th>\n",
       "      <th>Unnamed: 5</th>\n",
       "      <th>Unnamed: 6</th>\n",
       "      <th>Unnamed: 7</th>\n",
       "      <th>Unnamed: 8</th>\n",
       "      <th>Unnamed: 9</th>\n",
       "      <th>...</th>\n",
       "      <th>Unnamed: 373</th>\n",
       "      <th>Unnamed: 374</th>\n",
       "      <th>Unnamed: 375</th>\n",
       "      <th>Unnamed: 376</th>\n",
       "      <th>Unnamed: 377</th>\n",
       "      <th>Unnamed: 378</th>\n",
       "      <th>Unnamed: 379</th>\n",
       "      <th>Unnamed: 380</th>\n",
       "      <th>Unnamed: 381</th>\n",
       "      <th>Unnamed: 382</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Subject</td>\n",
       "      <td>Release</td>\n",
       "      <td>Acquisition</td>\n",
       "      <td>Gender</td>\n",
       "      <td>Age</td>\n",
       "      <td>3T_Full_MR_Compl</td>\n",
       "      <td>T1_Count</td>\n",
       "      <td>T2_Count</td>\n",
       "      <td>3T_RS-fMRI_Count</td>\n",
       "      <td>3T_RS-fMRI_PctCompl</td>\n",
       "      <td>...</td>\n",
       "      <td>Noise_Comp</td>\n",
       "      <td>Odor_Unadj</td>\n",
       "      <td>Odor_AgeAdj</td>\n",
       "      <td>PainIntens_RawScore</td>\n",
       "      <td>PainInterf_Tscore</td>\n",
       "      <td>Taste_Unadj</td>\n",
       "      <td>Taste_AgeAdj</td>\n",
       "      <td>Mars_Log_Score</td>\n",
       "      <td>Mars_Errs</td>\n",
       "      <td>Mars_Final</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100004</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q06</td>\n",
       "      <td>M</td>\n",
       "      <td>22-25</td>\n",
       "      <td>FALSE</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>5.2</td>\n",
       "      <td>101.12</td>\n",
       "      <td>86.45</td>\n",
       "      <td>2</td>\n",
       "      <td>45.9</td>\n",
       "      <td>107.17</td>\n",
       "      <td>105.31</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100206</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q11</td>\n",
       "      <td>M</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>97.19</td>\n",
       "      <td>1</td>\n",
       "      <td>49.7</td>\n",
       "      <td>72.63</td>\n",
       "      <td>72.03</td>\n",
       "      <td>1.84</td>\n",
       "      <td>0</td>\n",
       "      <td>1.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>Q1</td>\n",
       "      <td>Q01</td>\n",
       "      <td>F</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.6</td>\n",
       "      <td>101.12</td>\n",
       "      <td>86.45</td>\n",
       "      <td>0</td>\n",
       "      <td>38.6</td>\n",
       "      <td>71.69</td>\n",
       "      <td>71.76</td>\n",
       "      <td>1.76</td>\n",
       "      <td>0</td>\n",
       "      <td>1.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>Q3</td>\n",
       "      <td>Q03</td>\n",
       "      <td>M</td>\n",
       "      <td>31-35</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>98.04</td>\n",
       "      <td>2</td>\n",
       "      <td>52.6</td>\n",
       "      <td>114.01</td>\n",
       "      <td>113.59</td>\n",
       "      <td>1.76</td>\n",
       "      <td>2</td>\n",
       "      <td>1.68</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 383 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  Unnamed: 0 Unnamed: 1   Unnamed: 2 Unnamed: 3 Unnamed: 4        Unnamed: 5  \\\n",
       "0    Subject    Release  Acquisition     Gender        Age  3T_Full_MR_Compl   \n",
       "1     100004       S900          Q06          M      22-25             FALSE   \n",
       "2     100206       S900          Q11          M      26-30              TRUE   \n",
       "3     100307         Q1          Q01          F      26-30              TRUE   \n",
       "4     100408         Q3          Q03          M      31-35              TRUE   \n",
       "\n",
       "  Unnamed: 6 Unnamed: 7        Unnamed: 8           Unnamed: 9  ...  \\\n",
       "0   T1_Count   T2_Count  3T_RS-fMRI_Count  3T_RS-fMRI_PctCompl  ...   \n",
       "1          0          0                 0                  0.0  ...   \n",
       "2          1          1                 4                100.0  ...   \n",
       "3          1          1                 4                100.0  ...   \n",
       "4          1          1                 4                100.0  ...   \n",
       "\n",
       "  Unnamed: 373 Unnamed: 374 Unnamed: 375         Unnamed: 376  \\\n",
       "0   Noise_Comp   Odor_Unadj  Odor_AgeAdj  PainIntens_RawScore   \n",
       "1          5.2       101.12        86.45                    2   \n",
       "2          6.0       108.79        97.19                    1   \n",
       "3          3.6       101.12        86.45                    0   \n",
       "4          2.0       108.79        98.04                    2   \n",
       "\n",
       "        Unnamed: 377 Unnamed: 378  Unnamed: 379    Unnamed: 380 Unnamed: 381  \\\n",
       "0  PainInterf_Tscore  Taste_Unadj  Taste_AgeAdj  Mars_Log_Score    Mars_Errs   \n",
       "1               45.9       107.17        105.31             1.8            0   \n",
       "2               49.7        72.63         72.03            1.84            0   \n",
       "3               38.6        71.69         71.76            1.76            0   \n",
       "4               52.6       114.01        113.59            1.76            2   \n",
       "\n",
       "  Unnamed: 382  \n",
       "0   Mars_Final  \n",
       "1          1.8  \n",
       "2         1.84  \n",
       "3         1.76  \n",
       "4         1.68  \n",
       "\n",
       "[5 rows x 383 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allData_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Cleaned the dataframe by using row 0 as the column names of the dataframe.\n",
    "\n",
    "The 0th row is then deleted."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "allData_df.columns.name = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "allData_df.columns = list(allData_df.iloc[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>Release</th>\n",
       "      <th>Acquisition</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>3T_Full_MR_Compl</th>\n",
       "      <th>T1_Count</th>\n",
       "      <th>T2_Count</th>\n",
       "      <th>3T_RS-fMRI_Count</th>\n",
       "      <th>3T_RS-fMRI_PctCompl</th>\n",
       "      <th>...</th>\n",
       "      <th>Noise_Comp</th>\n",
       "      <th>Odor_Unadj</th>\n",
       "      <th>Odor_AgeAdj</th>\n",
       "      <th>PainIntens_RawScore</th>\n",
       "      <th>PainInterf_Tscore</th>\n",
       "      <th>Taste_Unadj</th>\n",
       "      <th>Taste_AgeAdj</th>\n",
       "      <th>Mars_Log_Score</th>\n",
       "      <th>Mars_Errs</th>\n",
       "      <th>Mars_Final</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Subject</td>\n",
       "      <td>Release</td>\n",
       "      <td>Acquisition</td>\n",
       "      <td>Gender</td>\n",
       "      <td>Age</td>\n",
       "      <td>3T_Full_MR_Compl</td>\n",
       "      <td>T1_Count</td>\n",
       "      <td>T2_Count</td>\n",
       "      <td>3T_RS-fMRI_Count</td>\n",
       "      <td>3T_RS-fMRI_PctCompl</td>\n",
       "      <td>...</td>\n",
       "      <td>Noise_Comp</td>\n",
       "      <td>Odor_Unadj</td>\n",
       "      <td>Odor_AgeAdj</td>\n",
       "      <td>PainIntens_RawScore</td>\n",
       "      <td>PainInterf_Tscore</td>\n",
       "      <td>Taste_Unadj</td>\n",
       "      <td>Taste_AgeAdj</td>\n",
       "      <td>Mars_Log_Score</td>\n",
       "      <td>Mars_Errs</td>\n",
       "      <td>Mars_Final</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100004</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q06</td>\n",
       "      <td>M</td>\n",
       "      <td>22-25</td>\n",
       "      <td>FALSE</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>5.2</td>\n",
       "      <td>101.12</td>\n",
       "      <td>86.45</td>\n",
       "      <td>2</td>\n",
       "      <td>45.9</td>\n",
       "      <td>107.17</td>\n",
       "      <td>105.31</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100206</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q11</td>\n",
       "      <td>M</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>97.19</td>\n",
       "      <td>1</td>\n",
       "      <td>49.7</td>\n",
       "      <td>72.63</td>\n",
       "      <td>72.03</td>\n",
       "      <td>1.84</td>\n",
       "      <td>0</td>\n",
       "      <td>1.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>Q1</td>\n",
       "      <td>Q01</td>\n",
       "      <td>F</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.6</td>\n",
       "      <td>101.12</td>\n",
       "      <td>86.45</td>\n",
       "      <td>0</td>\n",
       "      <td>38.6</td>\n",
       "      <td>71.69</td>\n",
       "      <td>71.76</td>\n",
       "      <td>1.76</td>\n",
       "      <td>0</td>\n",
       "      <td>1.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>Q3</td>\n",
       "      <td>Q03</td>\n",
       "      <td>M</td>\n",
       "      <td>31-35</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>98.04</td>\n",
       "      <td>2</td>\n",
       "      <td>52.6</td>\n",
       "      <td>114.01</td>\n",
       "      <td>113.59</td>\n",
       "      <td>1.76</td>\n",
       "      <td>2</td>\n",
       "      <td>1.68</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 383 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Subject  Release  Acquisition  Gender    Age  3T_Full_MR_Compl  T1_Count  \\\n",
       "0  Subject  Release  Acquisition  Gender    Age  3T_Full_MR_Compl  T1_Count   \n",
       "1   100004     S900          Q06       M  22-25             FALSE         0   \n",
       "2   100206     S900          Q11       M  26-30              TRUE         1   \n",
       "3   100307       Q1          Q01       F  26-30              TRUE         1   \n",
       "4   100408       Q3          Q03       M  31-35              TRUE         1   \n",
       "\n",
       "   T2_Count  3T_RS-fMRI_Count  3T_RS-fMRI_PctCompl  ...  Noise_Comp  \\\n",
       "0  T2_Count  3T_RS-fMRI_Count  3T_RS-fMRI_PctCompl  ...  Noise_Comp   \n",
       "1         0                 0                  0.0  ...         5.2   \n",
       "2         1                 4                100.0  ...         6.0   \n",
       "3         1                 4                100.0  ...         3.6   \n",
       "4         1                 4                100.0  ...         2.0   \n",
       "\n",
       "   Odor_Unadj  Odor_AgeAdj  PainIntens_RawScore  PainInterf_Tscore  \\\n",
       "0  Odor_Unadj  Odor_AgeAdj  PainIntens_RawScore  PainInterf_Tscore   \n",
       "1      101.12        86.45                    2               45.9   \n",
       "2      108.79        97.19                    1               49.7   \n",
       "3      101.12        86.45                    0               38.6   \n",
       "4      108.79        98.04                    2               52.6   \n",
       "\n",
       "   Taste_Unadj  Taste_AgeAdj  Mars_Log_Score  Mars_Errs  Mars_Final  \n",
       "0  Taste_Unadj  Taste_AgeAdj  Mars_Log_Score  Mars_Errs  Mars_Final  \n",
       "1       107.17        105.31             1.8          0         1.8  \n",
       "2        72.63         72.03            1.84          0        1.84  \n",
       "3        71.69         71.76            1.76          0        1.76  \n",
       "4       114.01        113.59            1.76          2        1.68  \n",
       "\n",
       "[5 rows x 383 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allData_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "allData_df = allData_df.drop(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>Release</th>\n",
       "      <th>Acquisition</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>3T_Full_MR_Compl</th>\n",
       "      <th>T1_Count</th>\n",
       "      <th>T2_Count</th>\n",
       "      <th>3T_RS-fMRI_Count</th>\n",
       "      <th>3T_RS-fMRI_PctCompl</th>\n",
       "      <th>...</th>\n",
       "      <th>Noise_Comp</th>\n",
       "      <th>Odor_Unadj</th>\n",
       "      <th>Odor_AgeAdj</th>\n",
       "      <th>PainIntens_RawScore</th>\n",
       "      <th>PainInterf_Tscore</th>\n",
       "      <th>Taste_Unadj</th>\n",
       "      <th>Taste_AgeAdj</th>\n",
       "      <th>Mars_Log_Score</th>\n",
       "      <th>Mars_Errs</th>\n",
       "      <th>Mars_Final</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100004</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q06</td>\n",
       "      <td>M</td>\n",
       "      <td>22-25</td>\n",
       "      <td>FALSE</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>5.2</td>\n",
       "      <td>101.12</td>\n",
       "      <td>86.45</td>\n",
       "      <td>2</td>\n",
       "      <td>45.9</td>\n",
       "      <td>107.17</td>\n",
       "      <td>105.31</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100206</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q11</td>\n",
       "      <td>M</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>97.19</td>\n",
       "      <td>1</td>\n",
       "      <td>49.7</td>\n",
       "      <td>72.63</td>\n",
       "      <td>72.03</td>\n",
       "      <td>1.84</td>\n",
       "      <td>0</td>\n",
       "      <td>1.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>Q1</td>\n",
       "      <td>Q01</td>\n",
       "      <td>F</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.6</td>\n",
       "      <td>101.12</td>\n",
       "      <td>86.45</td>\n",
       "      <td>0</td>\n",
       "      <td>38.6</td>\n",
       "      <td>71.69</td>\n",
       "      <td>71.76</td>\n",
       "      <td>1.76</td>\n",
       "      <td>0</td>\n",
       "      <td>1.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>Q3</td>\n",
       "      <td>Q03</td>\n",
       "      <td>M</td>\n",
       "      <td>31-35</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>98.04</td>\n",
       "      <td>2</td>\n",
       "      <td>52.6</td>\n",
       "      <td>114.01</td>\n",
       "      <td>113.59</td>\n",
       "      <td>1.76</td>\n",
       "      <td>2</td>\n",
       "      <td>1.68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>100610</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q08</td>\n",
       "      <td>M</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>122.25</td>\n",
       "      <td>110.45</td>\n",
       "      <td>0</td>\n",
       "      <td>38.6</td>\n",
       "      <td>84.84</td>\n",
       "      <td>85.31</td>\n",
       "      <td>1.92</td>\n",
       "      <td>1</td>\n",
       "      <td>1.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1202</th>\n",
       "      <td>992774</td>\n",
       "      <td>Q2</td>\n",
       "      <td>Q02</td>\n",
       "      <td>M</td>\n",
       "      <td>31-35</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>8.4</td>\n",
       "      <td>122.25</td>\n",
       "      <td>111.41</td>\n",
       "      <td>4</td>\n",
       "      <td>50.1</td>\n",
       "      <td>107.17</td>\n",
       "      <td>103.55</td>\n",
       "      <td>1.76</td>\n",
       "      <td>0</td>\n",
       "      <td>1.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1203</th>\n",
       "      <td>993675</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q09</td>\n",
       "      <td>F</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.4</td>\n",
       "      <td>122.25</td>\n",
       "      <td>110.45</td>\n",
       "      <td>0</td>\n",
       "      <td>38.6</td>\n",
       "      <td>84.07</td>\n",
       "      <td>84.25</td>\n",
       "      <td>1.8</td>\n",
       "      <td>1</td>\n",
       "      <td>1.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1204</th>\n",
       "      <td>994273</td>\n",
       "      <td>S500</td>\n",
       "      <td>Q06</td>\n",
       "      <td>M</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>122.25</td>\n",
       "      <td>111.41</td>\n",
       "      <td>7</td>\n",
       "      <td>63.8</td>\n",
       "      <td>110.65</td>\n",
       "      <td>109.73</td>\n",
       "      <td>1.8</td>\n",
       "      <td>1</td>\n",
       "      <td>1.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1205</th>\n",
       "      <td>995174</td>\n",
       "      <td>S1200</td>\n",
       "      <td>Q13</td>\n",
       "      <td>M</td>\n",
       "      <td>22-25</td>\n",
       "      <td>FALSE</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.6</td>\n",
       "      <td>88.61</td>\n",
       "      <td>64.58</td>\n",
       "      <td>3</td>\n",
       "      <td>50.1</td>\n",
       "      <td>117.16</td>\n",
       "      <td>117.4</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1206</th>\n",
       "      <td>996782</td>\n",
       "      <td>S900</td>\n",
       "      <td>Q08</td>\n",
       "      <td>F</td>\n",
       "      <td>26-30</td>\n",
       "      <td>TRUE</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>100.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>108.79</td>\n",
       "      <td>97.19</td>\n",
       "      <td>0</td>\n",
       "      <td>38.6</td>\n",
       "      <td>75.43</td>\n",
       "      <td>73.72</td>\n",
       "      <td>1.84</td>\n",
       "      <td>0</td>\n",
       "      <td>1.84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1206 rows × 383 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Subject Release Acquisition Gender    Age 3T_Full_MR_Compl T1_Count  \\\n",
       "1     100004    S900         Q06      M  22-25            FALSE        0   \n",
       "2     100206    S900         Q11      M  26-30             TRUE        1   \n",
       "3     100307      Q1         Q01      F  26-30             TRUE        1   \n",
       "4     100408      Q3         Q03      M  31-35             TRUE        1   \n",
       "5     100610    S900         Q08      M  26-30             TRUE        2   \n",
       "...      ...     ...         ...    ...    ...              ...      ...   \n",
       "1202  992774      Q2         Q02      M  31-35             TRUE        2   \n",
       "1203  993675    S900         Q09      F  26-30             TRUE        2   \n",
       "1204  994273    S500         Q06      M  26-30             TRUE        1   \n",
       "1205  995174   S1200         Q13      M  22-25            FALSE        1   \n",
       "1206  996782    S900         Q08      F  26-30             TRUE        2   \n",
       "\n",
       "     T2_Count 3T_RS-fMRI_Count 3T_RS-fMRI_PctCompl  ... Noise_Comp Odor_Unadj  \\\n",
       "1           0                0                 0.0  ...        5.2     101.12   \n",
       "2           1                4               100.0  ...        6.0     108.79   \n",
       "3           1                4               100.0  ...        3.6     101.12   \n",
       "4           1                4               100.0  ...        2.0     108.79   \n",
       "5           1                4               100.0  ...        2.0     122.25   \n",
       "...       ...              ...                 ...  ...        ...        ...   \n",
       "1202        2                4               100.0  ...        8.4     122.25   \n",
       "1203        2                4               100.0  ...        0.4     122.25   \n",
       "1204        1                4               100.0  ...        6.0     122.25   \n",
       "1205        1                2                 0.0  ...        3.6      88.61   \n",
       "1206        2                4               100.0  ...        6.0     108.79   \n",
       "\n",
       "     Odor_AgeAdj PainIntens_RawScore PainInterf_Tscore Taste_Unadj  \\\n",
       "1          86.45                   2              45.9      107.17   \n",
       "2          97.19                   1              49.7       72.63   \n",
       "3          86.45                   0              38.6       71.69   \n",
       "4          98.04                   2              52.6      114.01   \n",
       "5         110.45                   0              38.6       84.84   \n",
       "...          ...                 ...               ...         ...   \n",
       "1202      111.41                   4              50.1      107.17   \n",
       "1203      110.45                   0              38.6       84.07   \n",
       "1204      111.41                   7              63.8      110.65   \n",
       "1205       64.58                   3              50.1      117.16   \n",
       "1206       97.19                   0              38.6       75.43   \n",
       "\n",
       "     Taste_AgeAdj Mars_Log_Score Mars_Errs Mars_Final  \n",
       "1          105.31            1.8         0        1.8  \n",
       "2           72.03           1.84         0       1.84  \n",
       "3           71.76           1.76         0       1.76  \n",
       "4          113.59           1.76         2       1.68  \n",
       "5           85.31           1.92         1       1.88  \n",
       "...           ...            ...       ...        ...  \n",
       "1202       103.55           1.76         0       1.76  \n",
       "1203        84.25            1.8         1       1.76  \n",
       "1204       109.73            1.8         1       1.76  \n",
       "1205        117.4            1.8         0        1.8  \n",
       "1206        73.72           1.84         0       1.84  \n",
       "\n",
       "[1206 rows x 383 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allData_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is how the dataframe looks now.\n",
    "\n",
    "From this dataframe, personality data is extracted.\n",
    "\n",
    "The personality data columns have start index of 308 and end index of 373."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>NEORAW_05</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>27</td>\n",
       "      <td>15</td>\n",
       "      <td>28</td>\n",
       "      <td>24</td>\n",
       "      <td>25</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>29</td>\n",
       "      <td>23</td>\n",
       "      <td>26</td>\n",
       "      <td>21</td>\n",
       "      <td>32</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>34</td>\n",
       "      <td>15</td>\n",
       "      <td>33</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>31</td>\n",
       "      <td>33</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>15</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>SD</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1202</th>\n",
       "      <td>36</td>\n",
       "      <td>24</td>\n",
       "      <td>39</td>\n",
       "      <td>16</td>\n",
       "      <td>32</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1203</th>\n",
       "      <td>42</td>\n",
       "      <td>31</td>\n",
       "      <td>37</td>\n",
       "      <td>10</td>\n",
       "      <td>24</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1204</th>\n",
       "      <td>28</td>\n",
       "      <td>32</td>\n",
       "      <td>29</td>\n",
       "      <td>22</td>\n",
       "      <td>27</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1205</th>\n",
       "      <td>27</td>\n",
       "      <td>30</td>\n",
       "      <td>41</td>\n",
       "      <td>18</td>\n",
       "      <td>36</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1206</th>\n",
       "      <td>34</td>\n",
       "      <td>32</td>\n",
       "      <td>35</td>\n",
       "      <td>20</td>\n",
       "      <td>22</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1206 rows × 65 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02  \\\n",
       "1          27       15       28       24       25         A         D   \n",
       "2          29       23       26       21       32         D         A   \n",
       "3          37       24       35       15       37         N        SA   \n",
       "4          33       29       34       15       33         D         A   \n",
       "5          31       33       36        7       15        SA         D   \n",
       "...       ...      ...      ...      ...      ...       ...       ...   \n",
       "1202       36       24       39       16       32         N         N   \n",
       "1203       42       31       37       10       24         A         D   \n",
       "1204       28       32       29       22       27         D         D   \n",
       "1205       27       30       41       18       36         A         A   \n",
       "1206       34       32       35       20       22        SD         D   \n",
       "\n",
       "     NEORAW_03 NEORAW_04 NEORAW_05  ... NEORAW_51 NEORAW_52 NEORAW_53  \\\n",
       "1           SA        SA         N  ...         N         A         A   \n",
       "2            A        SA         N  ...         N         A         A   \n",
       "3            A         A        SA  ...         D        SA         A   \n",
       "4            N        SA         A  ...         D         A         A   \n",
       "5            D        SA         N  ...        SD         N        SA   \n",
       "...        ...       ...       ...  ...       ...       ...       ...   \n",
       "1202         A        SA         N  ...         D        SA         A   \n",
       "1203         N         A         N  ...         D         N         A   \n",
       "1204         N        SA         N  ...         D         A         A   \n",
       "1205         N        SA         A  ...         D        SA        SA   \n",
       "1206         D         A         N  ...         D         N         N   \n",
       "\n",
       "     NEORAW_54 NEORAW_55 NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "1            A         A         D         N         N         D         A  \n",
       "2            D         N         D         N         D         A         N  \n",
       "3           SD         D         D         D         D        SD         A  \n",
       "4            N         D         N         D         A         A         A  \n",
       "5           SD         D         A        SA        SA         D         A  \n",
       "...        ...       ...       ...       ...       ...       ...       ...  \n",
       "1202         D         D        SD        SD         A        SD        SA  \n",
       "1203         D         D        SD         D         D        SD        SA  \n",
       "1204         N         N         D         N         N         N         A  \n",
       "1205        SA         D         A         D        SA         D        SA  \n",
       "1206         D         A         N         A         D         D         A  \n",
       "\n",
       "[1206 rows x 65 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allData_df.iloc[:,308:373]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The personality data for all 1200 participants in stored in the variable `testing_personality`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "testing_personality = allData_df.iloc[:,308:373]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>NEORAW_05</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>27</td>\n",
       "      <td>15</td>\n",
       "      <td>28</td>\n",
       "      <td>24</td>\n",
       "      <td>25</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>29</td>\n",
       "      <td>23</td>\n",
       "      <td>26</td>\n",
       "      <td>21</td>\n",
       "      <td>32</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>34</td>\n",
       "      <td>15</td>\n",
       "      <td>33</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>31</td>\n",
       "      <td>33</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>15</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>N</td>\n",
       "      <td>...</td>\n",
       "      <td>SD</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 65 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 NEORAW_03  \\\n",
       "1       27       15       28       24       25         A         D        SA   \n",
       "2       29       23       26       21       32         D         A         A   \n",
       "3       37       24       35       15       37         N        SA         A   \n",
       "4       33       29       34       15       33         D         A         N   \n",
       "5       31       33       36        7       15        SA         D         D   \n",
       "\n",
       "  NEORAW_04 NEORAW_05  ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55  \\\n",
       "1        SA         N  ...         N         A         A         A         A   \n",
       "2        SA         N  ...         N         A         A         D         N   \n",
       "3         A        SA  ...         D        SA         A        SD         D   \n",
       "4        SA         A  ...         D         A         A         N         D   \n",
       "5        SA         N  ...        SD         N        SA        SD         D   \n",
       "\n",
       "  NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "1         D         N         N         D         A  \n",
       "2         D         N         D         A         N  \n",
       "3         D         D         D        SD         A  \n",
       "4         N         D         A         A         A  \n",
       "5         A        SA        SA         D         A  \n",
       "\n",
       "[5 rows x 65 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testing_personality.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extracting the subject ids for all 1200 participants in the variable `all_id`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_id = allData_df.iloc[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1       100004\n",
       "2       100206\n",
       "3       100307\n",
       "4       100408\n",
       "5       100610\n",
       "         ...  \n",
       "1202    992774\n",
       "1203    993675\n",
       "1204    994273\n",
       "1205    995174\n",
       "1206    996782\n",
       "Name: Subject, Length: 1206, dtype: object"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_id"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A new dataframe, named `personality_df` is created by extracting personality data from `allData_df` and combining it with `all_id`.\n",
    "\n",
    "It contains the personality data for all 1200 participants."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "personality_df = pd.concat([all_id,testing_personality],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100004</td>\n",
       "      <td>27</td>\n",
       "      <td>15</td>\n",
       "      <td>28</td>\n",
       "      <td>24</td>\n",
       "      <td>25</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100206</td>\n",
       "      <td>29</td>\n",
       "      <td>23</td>\n",
       "      <td>26</td>\n",
       "      <td>21</td>\n",
       "      <td>32</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>37</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>34</td>\n",
       "      <td>15</td>\n",
       "      <td>33</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>100610</td>\n",
       "      <td>31</td>\n",
       "      <td>33</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>15</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>SD</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 66 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02  \\\n",
       "1  100004       27       15       28       24       25         A         D   \n",
       "2  100206       29       23       26       21       32         D         A   \n",
       "3  100307       37       24       35       15       37         N        SA   \n",
       "4  100408       33       29       34       15       33         D         A   \n",
       "5  100610       31       33       36        7       15        SA         D   \n",
       "\n",
       "  NEORAW_03 NEORAW_04  ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55  \\\n",
       "1        SA        SA  ...         N         A         A         A         A   \n",
       "2         A        SA  ...         N         A         A         D         N   \n",
       "3         A         A  ...         D        SA         A        SD         D   \n",
       "4         N        SA  ...         D         A         A         N         D   \n",
       "5         D        SA  ...        SD         N        SA        SD         D   \n",
       "\n",
       "  NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "1         D         N         N         D         A  \n",
       "2         D         N         D         A         N  \n",
       "3         D         D         D        SD         A  \n",
       "4         N         D         A         A         A  \n",
       "5         A        SA        SA         D         A  \n",
       "\n",
       "[5 rows x 66 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `personality_subset` dataframe is meant to contain the personality data for the 100 participants."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "personality_subset = pd.DataFrame(columns=personality_df.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>0 rows × 66 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [Subject, NEOFAC_A, NEOFAC_O, NEOFAC_C, NEOFAC_N, NEOFAC_E, NEORAW_01, NEORAW_02, NEORAW_03, NEORAW_04, NEORAW_05, NEORAW_06, NEORAW_07, NEORAW_08, NEORAW_09, NEORAW_10, NEORAW_11, NEORAW_12, NEORAW_13, NEORAW_14, NEORAW_15, NEORAW_16, NEORAW_17, NEORAW_18, NEORAW_19, NEORAW_20, NEORAW_21, NEORAW_22, NEORAW_23, NEORAW_24, NEORAW_25, NEORAW_26, NEORAW_27, NEORAW_28, NEORAW_29, NEORAW_30, NEORAW_31, NEORAW_32, NEORAW_33, NEORAW_34, NEORAW_35, NEORAW_36, NEORAW_37, NEORAW_38, NEORAW_39, NEORAW_40, NEORAW_41, NEORAW_42, NEORAW_43, NEORAW_44, NEORAW_45, NEORAW_46, NEORAW_47, NEORAW_48, NEORAW_49, NEORAW_50, NEORAW_51, NEORAW_52, NEORAW_53, NEORAW_54, NEORAW_55, NEORAW_56, NEORAW_57, NEORAW_58, NEORAW_59, NEORAW_60]\n",
       "Index: []\n",
       "\n",
       "[0 rows x 66 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_subset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looping through the list `id_Series_str`, which contains the subject ids of the 100 participants we want, the personality data for that respective participant `subset_id` is extracted from the personality data of all 1200 participants.\n",
    "\n",
    "The extracted value is added to `personality_subset`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "for subset_id in id_Series_str:\n",
    "\n",
    "    subject_data = personality_df[personality_df['Subject'] == subset_id]\n",
    "    personality_subset = pd.concat([personality_subset, subject_data],axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>37</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>34</td>\n",
       "      <td>15</td>\n",
       "      <td>33</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>101915</td>\n",
       "      <td>35</td>\n",
       "      <td>30</td>\n",
       "      <td>45</td>\n",
       "      <td>8</td>\n",
       "      <td>31</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>SD</td>\n",
       "      <td>SD</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>102816</td>\n",
       "      <td>36</td>\n",
       "      <td>27</td>\n",
       "      <td>32</td>\n",
       "      <td>10</td>\n",
       "      <td>31</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>103414</td>\n",
       "      <td>27</td>\n",
       "      <td>30</td>\n",
       "      <td>31</td>\n",
       "      <td>20</td>\n",
       "      <td>34</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 66 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02  \\\n",
       "3   100307       37       24       35       15       37         N        SA   \n",
       "4   100408       33       29       34       15       33         D         A   \n",
       "12  101915       35       30       45        8       31         N         D   \n",
       "19  102816       36       27       32       10       31         A         D   \n",
       "23  103414       27       30       31       20       34         D         A   \n",
       "\n",
       "   NEORAW_03 NEORAW_04  ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55  \\\n",
       "3          A         A  ...         D        SA         A        SD         D   \n",
       "4          N        SA  ...         D         A         A         N         D   \n",
       "12         A        SA  ...         D        SA        SA        SD        SD   \n",
       "19         N        SA  ...        SD         A         N         N         D   \n",
       "23         A        SA  ...         D        SA         A         D         D   \n",
       "\n",
       "   NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "3          D         D         D        SD         A  \n",
       "4          N         D         A         A         A  \n",
       "12        SD         D         D         D        SA  \n",
       "19         D         D         N         D        SA  \n",
       "23         D         N         N         D         A  \n",
       "\n",
       "[5 rows x 66 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_subset.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100, 66)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_subset.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The dataframe is then converted to a csv file and saved.\n",
    "\n",
    "Note that `index` argument is set to False for `to_csv` method."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {},
   "outputs": [],
   "source": [
    "personality_subset.to_csv(r'/Users/rajdeep_ch/Documents/nma/project/personality_data.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100004</td>\n",
       "      <td>27</td>\n",
       "      <td>15</td>\n",
       "      <td>28</td>\n",
       "      <td>24</td>\n",
       "      <td>25</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100206</td>\n",
       "      <td>29</td>\n",
       "      <td>23</td>\n",
       "      <td>26</td>\n",
       "      <td>21</td>\n",
       "      <td>32</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>N</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>37</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>34</td>\n",
       "      <td>15</td>\n",
       "      <td>33</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>100610</td>\n",
       "      <td>31</td>\n",
       "      <td>33</td>\n",
       "      <td>36</td>\n",
       "      <td>7</td>\n",
       "      <td>15</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>SD</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 66 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02  \\\n",
       "1  100004       27       15       28       24       25         A         D   \n",
       "2  100206       29       23       26       21       32         D         A   \n",
       "3  100307       37       24       35       15       37         N        SA   \n",
       "4  100408       33       29       34       15       33         D         A   \n",
       "5  100610       31       33       36        7       15        SA         D   \n",
       "\n",
       "  NEORAW_03 NEORAW_04  ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55  \\\n",
       "1        SA        SA  ...         N         A         A         A         A   \n",
       "2         A        SA  ...         N         A         A         D         N   \n",
       "3         A         A  ...         D        SA         A        SD         D   \n",
       "4         N        SA  ...         D         A         A         N         D   \n",
       "5         D        SA  ...        SD         N        SA        SD         D   \n",
       "\n",
       "  NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "1         D         N         N         D         A  \n",
       "2         D         N         D         A         N  \n",
       "3         D         D         D        SD         A  \n",
       "4         N         D         A         A         A  \n",
       "5         A        SA        SA         D         A  \n",
       "\n",
       "[5 rows x 66 columns]"
      ]
     },
     "execution_count": 298,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that the subject `131924` does not have data for the Big Five Inventory questions but does have scores for the five personality traits."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>131924</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 66 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02  \\\n",
       "178  131924       30       30       12       12       12       NaN       NaN   \n",
       "\n",
       "    NEORAW_03 NEORAW_04  ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54  \\\n",
       "178       NaN       NaN  ...       NaN       NaN       NaN       NaN   \n",
       "\n",
       "    NEORAW_55 NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "178       NaN       NaN       NaN       NaN       NaN       NaN  \n",
       "\n",
       "[1 rows x 66 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_df[personality_df['Subject'] == '131924']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Subject</th>\n",
       "      <th>NEOFAC_A</th>\n",
       "      <th>NEOFAC_O</th>\n",
       "      <th>NEOFAC_C</th>\n",
       "      <th>NEOFAC_N</th>\n",
       "      <th>NEOFAC_E</th>\n",
       "      <th>NEORAW_01</th>\n",
       "      <th>NEORAW_02</th>\n",
       "      <th>NEORAW_03</th>\n",
       "      <th>NEORAW_04</th>\n",
       "      <th>...</th>\n",
       "      <th>NEORAW_51</th>\n",
       "      <th>NEORAW_52</th>\n",
       "      <th>NEORAW_53</th>\n",
       "      <th>NEORAW_54</th>\n",
       "      <th>NEORAW_55</th>\n",
       "      <th>NEORAW_56</th>\n",
       "      <th>NEORAW_57</th>\n",
       "      <th>NEORAW_58</th>\n",
       "      <th>NEORAW_59</th>\n",
       "      <th>NEORAW_60</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100307</td>\n",
       "      <td>37</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100408</td>\n",
       "      <td>33</td>\n",
       "      <td>29</td>\n",
       "      <td>34</td>\n",
       "      <td>15</td>\n",
       "      <td>33</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>101915</td>\n",
       "      <td>35</td>\n",
       "      <td>30</td>\n",
       "      <td>45</td>\n",
       "      <td>8</td>\n",
       "      <td>31</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>SA</td>\n",
       "      <td>SD</td>\n",
       "      <td>SD</td>\n",
       "      <td>SD</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>102816</td>\n",
       "      <td>36</td>\n",
       "      <td>27</td>\n",
       "      <td>32</td>\n",
       "      <td>10</td>\n",
       "      <td>31</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>SD</td>\n",
       "      <td>A</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>103414</td>\n",
       "      <td>27</td>\n",
       "      <td>30</td>\n",
       "      <td>31</td>\n",
       "      <td>20</td>\n",
       "      <td>34</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>SA</td>\n",
       "      <td>...</td>\n",
       "      <td>D</td>\n",
       "      <td>SA</td>\n",
       "      <td>A</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "      <td>D</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 66 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02  \\\n",
       "3   100307       37       24       35       15       37         N        SA   \n",
       "4   100408       33       29       34       15       33         D         A   \n",
       "12  101915       35       30       45        8       31         N         D   \n",
       "19  102816       36       27       32       10       31         A         D   \n",
       "23  103414       27       30       31       20       34         D         A   \n",
       "\n",
       "   NEORAW_03 NEORAW_04  ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55  \\\n",
       "3          A         A  ...         D        SA         A        SD         D   \n",
       "4          N        SA  ...         D         A         A         N         D   \n",
       "12         A        SA  ...         D        SA        SA        SD        SD   \n",
       "19         N        SA  ...        SD         A         N         N         D   \n",
       "23         A        SA  ...         D        SA         A         D         D   \n",
       "\n",
       "   NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60  \n",
       "3          D         D         D        SD         A  \n",
       "4          N         D         A         A         A  \n",
       "12        SD         D         D         D        SA  \n",
       "19         D         D         N         D        SA  \n",
       "23         D         N         N         D         A  \n",
       "\n",
       "[5 rows x 66 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "personality_subset.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "Preliminarly going through the personality trait scores to check max, min."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "46"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neofac_a = [int(score1) for score1 in personality_subset.iloc[:,1]]\n",
    "np.max(neofac_a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "44"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neofac_b = [int(score2) for score2 in personality_subset.iloc[:,2]]\n",
    "np.max(neofac_b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "48"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neofac_c = [int(score3) for score3 in personality_subset.iloc[:,3]]\n",
    "np.max(neofac_c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "36"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neofac_d = [int(score4) for score4 in personality_subset.iloc[:,4]]\n",
    "np.max(neofac_d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "45"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neofac_e = [int(score5) for score5 in personality_subset.iloc[:,5]]\n",
    "np.max(neofac_e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "47"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.max([int(score_val) for score_val in personality_df.iloc[:,5] if type(score_val) != float])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}