{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Extracting Data\n", "---" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "'id_file' represents the `'subject_list.txt'` given with HCP data (inside the hcp_task folder) by NMA.\n", "\n", "It contains the subject ids of the 100 participants." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "id_file = r'/Users/rajdeep_ch/Documents/nma/project/hcp_task/subjects_list.txt' " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# using .loadtxt() method from numpy to load the text file\n", "id_Series = np.loadtxt(id_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The values contained in `id_Series` are floating point numbers.\n", "\n", "They are first converted to `int` and then to `str` type." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "id_Series_str = [str(int(sub_id)) for sub_id in id_Series]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "csv_file = r'/Users/rajdeep_ch/Documents/nma/project/alldata.csv' \n", "\n", "allData_df = pd.read_csv(csv_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The `allData_df` contains data for all 1200 participants across all the tasks." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1207, 383)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allData_df.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0Unnamed: 1Unnamed: 2Unnamed: 3Unnamed: 4Unnamed: 5Unnamed: 6Unnamed: 7Unnamed: 8Unnamed: 9...Unnamed: 373Unnamed: 374Unnamed: 375Unnamed: 376Unnamed: 377Unnamed: 378Unnamed: 379Unnamed: 380Unnamed: 381Unnamed: 382
0SubjectReleaseAcquisitionGenderAge3T_Full_MR_ComplT1_CountT2_Count3T_RS-fMRI_Count3T_RS-fMRI_PctCompl...Noise_CompOdor_UnadjOdor_AgeAdjPainIntens_RawScorePainInterf_TscoreTaste_UnadjTaste_AgeAdjMars_Log_ScoreMars_ErrsMars_Final
1100004S900Q06M22-25FALSE0000.0...5.2101.1286.45245.9107.17105.311.801.8
2100206S900Q11M26-30TRUE114100.0...6.0108.7997.19149.772.6372.031.8401.84
3100307Q1Q01F26-30TRUE114100.0...3.6101.1286.45038.671.6971.761.7601.76
4100408Q3Q03M31-35TRUE114100.0...2.0108.7998.04252.6114.01113.591.7621.68
\n", "

5 rows × 383 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 Unnamed: 1 Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5 \\\n", "0 Subject Release Acquisition Gender Age 3T_Full_MR_Compl \n", "1 100004 S900 Q06 M 22-25 FALSE \n", "2 100206 S900 Q11 M 26-30 TRUE \n", "3 100307 Q1 Q01 F 26-30 TRUE \n", "4 100408 Q3 Q03 M 31-35 TRUE \n", "\n", " Unnamed: 6 Unnamed: 7 Unnamed: 8 Unnamed: 9 ... \\\n", "0 T1_Count T2_Count 3T_RS-fMRI_Count 3T_RS-fMRI_PctCompl ... \n", "1 0 0 0 0.0 ... \n", "2 1 1 4 100.0 ... \n", "3 1 1 4 100.0 ... \n", "4 1 1 4 100.0 ... \n", "\n", " Unnamed: 373 Unnamed: 374 Unnamed: 375 Unnamed: 376 \\\n", "0 Noise_Comp Odor_Unadj Odor_AgeAdj PainIntens_RawScore \n", "1 5.2 101.12 86.45 2 \n", "2 6.0 108.79 97.19 1 \n", "3 3.6 101.12 86.45 0 \n", "4 2.0 108.79 98.04 2 \n", "\n", " Unnamed: 377 Unnamed: 378 Unnamed: 379 Unnamed: 380 Unnamed: 381 \\\n", "0 PainInterf_Tscore Taste_Unadj Taste_AgeAdj Mars_Log_Score Mars_Errs \n", "1 45.9 107.17 105.31 1.8 0 \n", "2 49.7 72.63 72.03 1.84 0 \n", "3 38.6 71.69 71.76 1.76 0 \n", "4 52.6 114.01 113.59 1.76 2 \n", "\n", " Unnamed: 382 \n", "0 Mars_Final \n", "1 1.8 \n", "2 1.84 \n", "3 1.76 \n", "4 1.68 \n", "\n", "[5 rows x 383 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allData_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cleaned the dataframe by using row 0 as the column names of the dataframe.\n", "\n", "The 0th row is then deleted." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "allData_df.columns.name = None" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "allData_df.columns = list(allData_df.iloc[0])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectReleaseAcquisitionGenderAge3T_Full_MR_ComplT1_CountT2_Count3T_RS-fMRI_Count3T_RS-fMRI_PctCompl...Noise_CompOdor_UnadjOdor_AgeAdjPainIntens_RawScorePainInterf_TscoreTaste_UnadjTaste_AgeAdjMars_Log_ScoreMars_ErrsMars_Final
0SubjectReleaseAcquisitionGenderAge3T_Full_MR_ComplT1_CountT2_Count3T_RS-fMRI_Count3T_RS-fMRI_PctCompl...Noise_CompOdor_UnadjOdor_AgeAdjPainIntens_RawScorePainInterf_TscoreTaste_UnadjTaste_AgeAdjMars_Log_ScoreMars_ErrsMars_Final
1100004S900Q06M22-25FALSE0000.0...5.2101.1286.45245.9107.17105.311.801.8
2100206S900Q11M26-30TRUE114100.0...6.0108.7997.19149.772.6372.031.8401.84
3100307Q1Q01F26-30TRUE114100.0...3.6101.1286.45038.671.6971.761.7601.76
4100408Q3Q03M31-35TRUE114100.0...2.0108.7998.04252.6114.01113.591.7621.68
\n", "

5 rows × 383 columns

\n", "
" ], "text/plain": [ " Subject Release Acquisition Gender Age 3T_Full_MR_Compl T1_Count \\\n", "0 Subject Release Acquisition Gender Age 3T_Full_MR_Compl T1_Count \n", "1 100004 S900 Q06 M 22-25 FALSE 0 \n", "2 100206 S900 Q11 M 26-30 TRUE 1 \n", "3 100307 Q1 Q01 F 26-30 TRUE 1 \n", "4 100408 Q3 Q03 M 31-35 TRUE 1 \n", "\n", " T2_Count 3T_RS-fMRI_Count 3T_RS-fMRI_PctCompl ... Noise_Comp \\\n", "0 T2_Count 3T_RS-fMRI_Count 3T_RS-fMRI_PctCompl ... Noise_Comp \n", "1 0 0 0.0 ... 5.2 \n", "2 1 4 100.0 ... 6.0 \n", "3 1 4 100.0 ... 3.6 \n", "4 1 4 100.0 ... 2.0 \n", "\n", " Odor_Unadj Odor_AgeAdj PainIntens_RawScore PainInterf_Tscore \\\n", "0 Odor_Unadj Odor_AgeAdj PainIntens_RawScore PainInterf_Tscore \n", "1 101.12 86.45 2 45.9 \n", "2 108.79 97.19 1 49.7 \n", "3 101.12 86.45 0 38.6 \n", "4 108.79 98.04 2 52.6 \n", "\n", " Taste_Unadj Taste_AgeAdj Mars_Log_Score Mars_Errs Mars_Final \n", "0 Taste_Unadj Taste_AgeAdj Mars_Log_Score Mars_Errs Mars_Final \n", "1 107.17 105.31 1.8 0 1.8 \n", "2 72.63 72.03 1.84 0 1.84 \n", "3 71.69 71.76 1.76 0 1.76 \n", "4 114.01 113.59 1.76 2 1.68 \n", "\n", "[5 rows x 383 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allData_df.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "allData_df = allData_df.drop(0)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectReleaseAcquisitionGenderAge3T_Full_MR_ComplT1_CountT2_Count3T_RS-fMRI_Count3T_RS-fMRI_PctCompl...Noise_CompOdor_UnadjOdor_AgeAdjPainIntens_RawScorePainInterf_TscoreTaste_UnadjTaste_AgeAdjMars_Log_ScoreMars_ErrsMars_Final
1100004S900Q06M22-25FALSE0000.0...5.2101.1286.45245.9107.17105.311.801.8
2100206S900Q11M26-30TRUE114100.0...6.0108.7997.19149.772.6372.031.8401.84
3100307Q1Q01F26-30TRUE114100.0...3.6101.1286.45038.671.6971.761.7601.76
4100408Q3Q03M31-35TRUE114100.0...2.0108.7998.04252.6114.01113.591.7621.68
5100610S900Q08M26-30TRUE214100.0...2.0122.25110.45038.684.8485.311.9211.88
..................................................................
1202992774Q2Q02M31-35TRUE224100.0...8.4122.25111.41450.1107.17103.551.7601.76
1203993675S900Q09F26-30TRUE224100.0...0.4122.25110.45038.684.0784.251.811.76
1204994273S500Q06M26-30TRUE114100.0...6.0122.25111.41763.8110.65109.731.811.76
1205995174S1200Q13M22-25FALSE1120.0...3.688.6164.58350.1117.16117.41.801.8
1206996782S900Q08F26-30TRUE224100.0...6.0108.7997.19038.675.4373.721.8401.84
\n", "

1206 rows × 383 columns

\n", "
" ], "text/plain": [ " Subject Release Acquisition Gender Age 3T_Full_MR_Compl T1_Count \\\n", "1 100004 S900 Q06 M 22-25 FALSE 0 \n", "2 100206 S900 Q11 M 26-30 TRUE 1 \n", "3 100307 Q1 Q01 F 26-30 TRUE 1 \n", "4 100408 Q3 Q03 M 31-35 TRUE 1 \n", "5 100610 S900 Q08 M 26-30 TRUE 2 \n", "... ... ... ... ... ... ... ... \n", "1202 992774 Q2 Q02 M 31-35 TRUE 2 \n", "1203 993675 S900 Q09 F 26-30 TRUE 2 \n", "1204 994273 S500 Q06 M 26-30 TRUE 1 \n", "1205 995174 S1200 Q13 M 22-25 FALSE 1 \n", "1206 996782 S900 Q08 F 26-30 TRUE 2 \n", "\n", " T2_Count 3T_RS-fMRI_Count 3T_RS-fMRI_PctCompl ... Noise_Comp Odor_Unadj \\\n", "1 0 0 0.0 ... 5.2 101.12 \n", "2 1 4 100.0 ... 6.0 108.79 \n", "3 1 4 100.0 ... 3.6 101.12 \n", "4 1 4 100.0 ... 2.0 108.79 \n", "5 1 4 100.0 ... 2.0 122.25 \n", "... ... ... ... ... ... ... \n", "1202 2 4 100.0 ... 8.4 122.25 \n", "1203 2 4 100.0 ... 0.4 122.25 \n", "1204 1 4 100.0 ... 6.0 122.25 \n", "1205 1 2 0.0 ... 3.6 88.61 \n", "1206 2 4 100.0 ... 6.0 108.79 \n", "\n", " Odor_AgeAdj PainIntens_RawScore PainInterf_Tscore Taste_Unadj \\\n", "1 86.45 2 45.9 107.17 \n", "2 97.19 1 49.7 72.63 \n", "3 86.45 0 38.6 71.69 \n", "4 98.04 2 52.6 114.01 \n", "5 110.45 0 38.6 84.84 \n", "... ... ... ... ... \n", "1202 111.41 4 50.1 107.17 \n", "1203 110.45 0 38.6 84.07 \n", "1204 111.41 7 63.8 110.65 \n", "1205 64.58 3 50.1 117.16 \n", "1206 97.19 0 38.6 75.43 \n", "\n", " Taste_AgeAdj Mars_Log_Score Mars_Errs Mars_Final \n", "1 105.31 1.8 0 1.8 \n", "2 72.03 1.84 0 1.84 \n", "3 71.76 1.76 0 1.76 \n", "4 113.59 1.76 2 1.68 \n", "5 85.31 1.92 1 1.88 \n", "... ... ... ... ... \n", "1202 103.55 1.76 0 1.76 \n", "1203 84.25 1.8 1 1.76 \n", "1204 109.73 1.8 1 1.76 \n", "1205 117.4 1.8 0 1.8 \n", "1206 73.72 1.84 0 1.84 \n", "\n", "[1206 rows x 383 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allData_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is how the dataframe looks now.\n", "\n", "From this dataframe, personality data is extracted.\n", "\n", "The personality data columns have start index of 308 and end index of 373." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04NEORAW_05...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
12715282425ADSASAN...NAAAADNNDA
22923262132DAASAN...NAADNDNDAN
33724351537NSAAASA...DSAASDDDDDSDA
43329341533DANSAA...DAANDNDAAA
5313336715SADDSAN...SDNSASDDASASADA
..................................................................
12023624391632NNASAN...DSAADDSDSDASDSA
12034231371024ADNAN...DNADDSDDDSDSA
12042832292227DDNSAN...DAANNDNNNA
12052730411836AANSAA...DSASASADADSADSA
12063432352022SDDDAN...DNNDANADDA
\n", "

1206 rows × 65 columns

\n", "
" ], "text/plain": [ " NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 \\\n", "1 27 15 28 24 25 A D \n", "2 29 23 26 21 32 D A \n", "3 37 24 35 15 37 N SA \n", "4 33 29 34 15 33 D A \n", "5 31 33 36 7 15 SA D \n", "... ... ... ... ... ... ... ... \n", "1202 36 24 39 16 32 N N \n", "1203 42 31 37 10 24 A D \n", "1204 28 32 29 22 27 D D \n", "1205 27 30 41 18 36 A A \n", "1206 34 32 35 20 22 SD D \n", "\n", " NEORAW_03 NEORAW_04 NEORAW_05 ... NEORAW_51 NEORAW_52 NEORAW_53 \\\n", "1 SA SA N ... N A A \n", "2 A SA N ... N A A \n", "3 A A SA ... D SA A \n", "4 N SA A ... D A A \n", "5 D SA N ... SD N SA \n", "... ... ... ... ... ... ... ... \n", "1202 A SA N ... D SA A \n", "1203 N A N ... D N A \n", "1204 N SA N ... D A A \n", "1205 N SA A ... D SA SA \n", "1206 D A N ... D N N \n", "\n", " NEORAW_54 NEORAW_55 NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "1 A A D N N D A \n", "2 D N D N D A N \n", "3 SD D D D D SD A \n", "4 N D N D A A A \n", "5 SD D A SA SA D A \n", "... ... ... ... ... ... ... ... \n", "1202 D D SD SD A SD SA \n", "1203 D D SD D D SD SA \n", "1204 N N D N N N A \n", "1205 SA D A D SA D SA \n", "1206 D A N A D D A \n", "\n", "[1206 rows x 65 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allData_df.iloc[:,308:373]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The personality data for all 1200 participants in stored in the variable `testing_personality`" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "testing_personality = allData_df.iloc[:,308:373]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04NEORAW_05...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
12715282425ADSASAN...NAAAADNNDA
22923262132DAASAN...NAADNDNDAN
33724351537NSAAASA...DSAASDDDDDSDA
43329341533DANSAA...DAANDNDAAA
5313336715SADDSAN...SDNSASDDASASADA
\n", "

5 rows × 65 columns

\n", "
" ], "text/plain": [ " NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 NEORAW_03 \\\n", "1 27 15 28 24 25 A D SA \n", "2 29 23 26 21 32 D A A \n", "3 37 24 35 15 37 N SA A \n", "4 33 29 34 15 33 D A N \n", "5 31 33 36 7 15 SA D D \n", "\n", " NEORAW_04 NEORAW_05 ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55 \\\n", "1 SA N ... N A A A A \n", "2 SA N ... N A A D N \n", "3 A SA ... D SA A SD D \n", "4 SA A ... D A A N D \n", "5 SA N ... SD N SA SD D \n", "\n", " NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "1 D N N D A \n", "2 D N D A N \n", "3 D D D SD A \n", "4 N D A A A \n", "5 A SA SA D A \n", "\n", "[5 rows x 65 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "testing_personality.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extracting the subject ids for all 1200 participants in the variable `all_id`" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "all_id = allData_df.iloc[:,0]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 100004\n", "2 100206\n", "3 100307\n", "4 100408\n", "5 100610\n", " ... \n", "1202 992774\n", "1203 993675\n", "1204 994273\n", "1205 995174\n", "1206 996782\n", "Name: Subject, Length: 1206, dtype: object" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_id" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A new dataframe, named `personality_df` is created by extracting personality data from `allData_df` and combining it with `all_id`.\n", "\n", "It contains the personality data for all 1200 participants." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "personality_df = pd.concat([all_id,testing_personality],axis=1)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectNEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
11000042715282425ADSASA...NAAAADNNDA
21002062923262132DAASA...NAADNDNDAN
31003073724351537NSAAA...DSAASDDDDDSDA
41004083329341533DANSA...DAANDNDAAA
5100610313336715SADDSA...SDNSASDDASASADA
\n", "

5 rows × 66 columns

\n", "
" ], "text/plain": [ " Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 \\\n", "1 100004 27 15 28 24 25 A D \n", "2 100206 29 23 26 21 32 D A \n", "3 100307 37 24 35 15 37 N SA \n", "4 100408 33 29 34 15 33 D A \n", "5 100610 31 33 36 7 15 SA D \n", "\n", " NEORAW_03 NEORAW_04 ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55 \\\n", "1 SA SA ... N A A A A \n", "2 A SA ... N A A D N \n", "3 A A ... D SA A SD D \n", "4 N SA ... D A A N D \n", "5 D SA ... SD N SA SD D \n", "\n", " NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "1 D N N D A \n", "2 D N D A N \n", "3 D D D SD A \n", "4 N D A A A \n", "5 A SA SA D A \n", "\n", "[5 rows x 66 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The `personality_subset` dataframe is meant to contain the personality data for the 100 participants." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "personality_subset = pd.DataFrame(columns=personality_df.columns)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectNEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
\n", "

0 rows × 66 columns

\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Subject, NEOFAC_A, NEOFAC_O, NEOFAC_C, NEOFAC_N, NEOFAC_E, NEORAW_01, NEORAW_02, NEORAW_03, NEORAW_04, NEORAW_05, NEORAW_06, NEORAW_07, NEORAW_08, NEORAW_09, NEORAW_10, NEORAW_11, NEORAW_12, NEORAW_13, NEORAW_14, NEORAW_15, NEORAW_16, NEORAW_17, NEORAW_18, NEORAW_19, NEORAW_20, NEORAW_21, NEORAW_22, NEORAW_23, NEORAW_24, NEORAW_25, NEORAW_26, NEORAW_27, NEORAW_28, NEORAW_29, NEORAW_30, NEORAW_31, NEORAW_32, NEORAW_33, NEORAW_34, NEORAW_35, NEORAW_36, NEORAW_37, NEORAW_38, NEORAW_39, NEORAW_40, NEORAW_41, NEORAW_42, NEORAW_43, NEORAW_44, NEORAW_45, NEORAW_46, NEORAW_47, NEORAW_48, NEORAW_49, NEORAW_50, NEORAW_51, NEORAW_52, NEORAW_53, NEORAW_54, NEORAW_55, NEORAW_56, NEORAW_57, NEORAW_58, NEORAW_59, NEORAW_60]\n", "Index: []\n", "\n", "[0 rows x 66 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_subset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looping through the list `id_Series_str`, which contains the subject ids of the 100 participants we want, the personality data for that respective participant `subset_id` is extracted from the personality data of all 1200 participants.\n", "\n", "The extracted value is added to `personality_subset`" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "for subset_id in id_Series_str:\n", "\n", " subject_data = personality_df[personality_df['Subject'] == subset_id]\n", " personality_subset = pd.concat([personality_subset, subject_data],axis=0)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectNEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
31003073724351537NSAAA...DSAASDDDDDSDA
41004083329341533DANSA...DAANDNDAAA
12101915353045831NDASA...DSASASDSDSDDDDSA
191028163627321031ADNSA...SDANNDDDNDSA
231034142730312034DAASA...DSAADDDNNDA
\n", "

5 rows × 66 columns

\n", "
" ], "text/plain": [ " Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 \\\n", "3 100307 37 24 35 15 37 N SA \n", "4 100408 33 29 34 15 33 D A \n", "12 101915 35 30 45 8 31 N D \n", "19 102816 36 27 32 10 31 A D \n", "23 103414 27 30 31 20 34 D A \n", "\n", " NEORAW_03 NEORAW_04 ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55 \\\n", "3 A A ... D SA A SD D \n", "4 N SA ... D A A N D \n", "12 A SA ... D SA SA SD SD \n", "19 N SA ... SD A N N D \n", "23 A SA ... D SA A D D \n", "\n", " NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "3 D D D SD A \n", "4 N D A A A \n", "12 SD D D D SA \n", "19 D D N D SA \n", "23 D N N D A \n", "\n", "[5 rows x 66 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_subset.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(100, 66)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_subset.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The dataframe is then converted to a csv file and saved.\n", "\n", "Note that `index` argument is set to False for `to_csv` method." ] }, { "cell_type": "code", "execution_count": 299, "metadata": {}, "outputs": [], "source": [ "personality_subset.to_csv(r'/Users/rajdeep_ch/Documents/nma/project/personality_data.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 298, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectNEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
11000042715282425ADSASA...NAAAADNNDA
21002062923262132DAASA...NAADNDNDAN
31003073724351537NSAAA...DSAASDDDDDSDA
41004083329341533DANSA...DAANDNDAAA
5100610313336715SADDSA...SDNSASDDASASADA
\n", "

5 rows × 66 columns

\n", "
" ], "text/plain": [ " Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 \\\n", "1 100004 27 15 28 24 25 A D \n", "2 100206 29 23 26 21 32 D A \n", "3 100307 37 24 35 15 37 N SA \n", "4 100408 33 29 34 15 33 D A \n", "5 100610 31 33 36 7 15 SA D \n", "\n", " NEORAW_03 NEORAW_04 ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55 \\\n", "1 SA SA ... N A A A A \n", "2 A SA ... N A A D N \n", "3 A A ... D SA A SD D \n", "4 N SA ... D A A N D \n", "5 D SA ... SD N SA SD D \n", "\n", " NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "1 D N N D A \n", "2 D N D A N \n", "3 D D D SD A \n", "4 N D A A A \n", "5 A SA SA D A \n", "\n", "[5 rows x 66 columns]" ] }, "execution_count": 298, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the subject `131924` does not have data for the Big Five Inventory questions but does have scores for the five personality traits." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectNEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
1781319243030121212NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

1 rows × 66 columns

\n", "
" ], "text/plain": [ " Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 \\\n", "178 131924 30 30 12 12 12 NaN NaN \n", "\n", " NEORAW_03 NEORAW_04 ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 \\\n", "178 NaN NaN ... NaN NaN NaN NaN \n", "\n", " NEORAW_55 NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "178 NaN NaN NaN NaN NaN NaN \n", "\n", "[1 rows x 66 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_df[personality_df['Subject'] == '131924']" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SubjectNEOFAC_ANEOFAC_ONEOFAC_CNEOFAC_NNEOFAC_ENEORAW_01NEORAW_02NEORAW_03NEORAW_04...NEORAW_51NEORAW_52NEORAW_53NEORAW_54NEORAW_55NEORAW_56NEORAW_57NEORAW_58NEORAW_59NEORAW_60
31003073724351537NSAAA...DSAASDDDDDSDA
41004083329341533DANSA...DAANDNDAAA
12101915353045831NDASA...DSASASDSDSDDDDSA
191028163627321031ADNSA...SDANNDDDNDSA
231034142730312034DAASA...DSAADDDNNDA
\n", "

5 rows × 66 columns

\n", "
" ], "text/plain": [ " Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E NEORAW_01 NEORAW_02 \\\n", "3 100307 37 24 35 15 37 N SA \n", "4 100408 33 29 34 15 33 D A \n", "12 101915 35 30 45 8 31 N D \n", "19 102816 36 27 32 10 31 A D \n", "23 103414 27 30 31 20 34 D A \n", "\n", " NEORAW_03 NEORAW_04 ... NEORAW_51 NEORAW_52 NEORAW_53 NEORAW_54 NEORAW_55 \\\n", "3 A A ... D SA A SD D \n", "4 N SA ... D A A N D \n", "12 A SA ... D SA SA SD SD \n", "19 N SA ... SD A N N D \n", "23 A SA ... D SA A D D \n", "\n", " NEORAW_56 NEORAW_57 NEORAW_58 NEORAW_59 NEORAW_60 \n", "3 D D D SD A \n", "4 N D A A A \n", "12 SD D D D SA \n", "19 D D N D SA \n", "23 D N N D A \n", "\n", "[5 rows x 66 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "personality_subset.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "\n", "Preliminarly going through the personality trait scores to check max, min." ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "46" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neofac_a = [int(score1) for score1 in personality_subset.iloc[:,1]]\n", "np.max(neofac_a)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "44" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neofac_b = [int(score2) for score2 in personality_subset.iloc[:,2]]\n", "np.max(neofac_b)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "48" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neofac_c = [int(score3) for score3 in personality_subset.iloc[:,3]]\n", "np.max(neofac_c)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "36" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neofac_d = [int(score4) for score4 in personality_subset.iloc[:,4]]\n", "np.max(neofac_d)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "45" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neofac_e = [int(score5) for score5 in personality_subset.iloc[:,5]]\n", "np.max(neofac_e)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "47" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.max([int(score_val) for score_val in personality_df.iloc[:,5] if type(score_val) != float])" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }