Model Fitting#


# import relevant libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score
%matplotlib inline
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

From toturial: https://www.section.io/engineering-education/how-to-implement-k-fold-cross-validation/

Includes some functions I will reuse later on with our data

dataset = pd.read_csv('/content/drive/MyDrive/NMA/breast_cancer_data.csv')
# Separate features and target variable
X = dataset.iloc[:, 2:-1].values
y = dataset. iloc [:, 1].values
print("Matrix of features", X, sep='\n') # so everything except for id, and diagnosis
print("--------------------------------------------------")
print("Target Variable", y, sep='\n') #label diagnosis
Matrix of features
[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
--------------------------------------------------
Target Variable
['M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'B' 'B' 'B' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'B' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'M'
 'M' 'B' 'M' 'M' 'B' 'B' 'B' 'B' 'M' 'B' 'M' 'M' 'B' 'B' 'B' 'B' 'M' 'B'
 'M' 'M' 'B' 'M' 'B' 'M' 'M' 'B' 'B' 'B' 'M' 'M' 'B' 'M' 'M' 'M' 'B' 'B'
 'B' 'M' 'B' 'B' 'M' 'M' 'B' 'B' 'B' 'M' 'M' 'B' 'B' 'B' 'B' 'M' 'B' 'B'
 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'M' 'M' 'B' 'M' 'M' 'B' 'B' 'B'
 'M' 'M' 'B' 'M' 'B' 'M' 'M' 'B' 'M' 'M' 'B' 'B' 'M' 'B' 'B' 'M' 'B' 'B'
 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'M'
 'M' 'B' 'M' 'B' 'B' 'M' 'M' 'B' 'B' 'M' 'M' 'B' 'B' 'B' 'B' 'M' 'B' 'B'
 'M' 'M' 'M' 'B' 'M' 'B' 'M' 'B' 'B' 'B' 'M' 'B' 'B' 'M' 'M' 'B' 'M' 'M'
 'M' 'M' 'B' 'M' 'M' 'M' 'B' 'M' 'B' 'M' 'B' 'B' 'M' 'B' 'M' 'M' 'M' 'M'
 'B' 'B' 'M' 'M' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'M' 'M' 'B' 'B' 'M'
 'B' 'B' 'M' 'M' 'B' 'M' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'M' 'B'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'B' 'B' 'B' 'B'
 'B' 'B' 'M' 'B' 'M' 'B' 'B' 'M' 'B' 'B' 'M' 'B' 'M' 'M' 'B' 'B' 'B' 'B'
 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'M' 'B' 'M' 'B' 'B' 'B'
 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'M' 'B' 'M'
 'B' 'B' 'B' 'B' 'M' 'M' 'M' 'B' 'B' 'B' 'B' 'M' 'B' 'M' 'B' 'M' 'B' 'B'
 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'M' 'M' 'B' 'B' 'B' 'B' 'B' 'B'
 'B' 'B' 'B' 'B' 'B' 'M' 'M' 'B' 'M' 'M' 'M' 'B' 'M' 'M' 'B' 'B' 'B' 'B'
 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'M' 'B' 'B' 'M' 'M' 'B' 'B'
 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B'
 'M' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B'
 'M' 'M' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'M' 'B' 'M' 'B' 'B' 'M'
 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'M' 'B' 'B' 'B' 'B' 'B' 'B'
 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'B'
 'B' 'M' 'B' 'M' 'B' 'B' 'M' 'B' 'B' 'B' 'B' 'B' 'M' 'M' 'B' 'M' 'B' 'M'
 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'B' 'M' 'B' 'M' 'B' 'M' 'M' 'B' 'B' 'B' 'M'
 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'M' 'B' 'M' 'M' 'B' 'B' 'B'
 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B'
 'B' 'B' 'B' 'B' 'M' 'M' 'M' 'M' 'M' 'M' 'B']
# Label Encode the target variable
# Change target variable to numerical values
        # The number 0 represents benign, while 1 represents malignant.
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
encoded_y = label_encoder.fit_transform(y)
label_encoder_name_mapping = dict(zip(label_encoder.classes_,
                                         label_encoder.transform(label_encoder.classes_)))
print("Mapping of Label Encoded Classes", label_encoder_name_mapping, sep="\n")
print("Label Encoded Target Variable", encoded_y, sep="\n")
Mapping of Label Encoded Classes
{'B': 0, 'M': 1}
Label Encoded Target Variable
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 0 1 1
 0 1 0 1 1 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0
 0 0 0 0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 0 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 1
 0 1 0 0 0 1 0 0 1 1 0 1 1 1 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 1 1 0 0
 0 1 0 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0
 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1
 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 0
 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 0 1 0 1 0 1 1
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 0]
# from sklearn.tree import DecisionTreeClassifier
# #define the model
# decision_tree_model = DecisionTreeClassifier(criterion="entropy",
#                                      random_state=0)

# #perform cross validation with model
# decision_tree_result = cross_validation(decision_tree_model, X, encoded_y, 10)
# print(decision_tree_result)
# # Plot Accuracy Result
# model_name = "Decision Tree"
# plot_result(model_name,
#             "Accuracy",
#             "Accuracy scores in 10 Folds",
#             decision_tree_result["Training Accuracy scores"],
#             decision_tree_result["Validation Accuracy scores"])

Model fMRI#


# load dataset
data_BOLD = pd.read_csv('/content/drive/MyDrive/NMA/BOLD_contrast_network_98.csv')
data_IES = pd.read_csv('/content/drive/MyDrive/NMA/ies_scores_98.csv')
data_personality = pd.read_csv('/content/drive/MyDrive/NMA/median_personality _98.csv')
data_IES
SUBJECTS EMOTION T1 IES EMOTION T2 IES SOCIAL T1 IES SOCIAL T2 IES GAMBLING IES
0 100307 6.994464 6.238000 4.790000 5.750 3.609062
1 100408 7.119444 6.708667 10.593333 12.675 4.811875
2 101915 9.155087 8.345333 11.930000 13.990 5.093125
3 102816 7.111111 6.612000 11.890000 14.195 4.874375
4 103414 7.847222 8.561333 7.386667 7.955 4.704063
... ... ... ... ... ... ...
93 199655 7.082907 6.660000 6.656667 9.225 3.010625
94 200614 7.932778 8.359333 6.043333 8.830 4.212500
95 201111 11.040556 9.281333 10.126667 10.270 5.563750
96 201414 8.061667 8.048000 9.090000 11.725 3.100312
97 205119 8.516055 7.414000 7.970000 21.400 5.100937

98 rows × 6 columns

data_BOLD
subject_id Emotion_Auditory_L Emotion_Auditory_R Emotion_Cingulo-Oper_L Emotion_Cingulo-Oper_R Emotion_Default_L Emotion_Default_R Emotion_Dorsal-atten_L Emotion_Dorsal-atten_R Emotion_Frontopariet_L ... Gambling_Posterior-Mu_L Gambling_Posterior-Mu_R Gambling_Somatomotor_L Gambling_Somatomotor_R Gambling_Ventral-Mult_L Gambling_Ventral-Mult_R Gambling_Visual1_L Gambling_Visual1_R Gambling_Visual2_L Gambling_Visual2_L.1
0 100307 -2.804921 -2.433804 -0.777985 -0.353353 10.053771 5.986613 11.420777 8.673832 4.628537 ... -7.973004 2.530932 -3.179724 -1.810594 -7.124124 1.524933 -3.339643 -3.025932 8.826484 7.199637
1 100408 3.516186 7.529671 -9.521039 -5.284645 -8.731886 7.884268 11.791724 1.867515 -8.937923 ... -24.315753 -18.506888 -6.057032 4.568156 -48.646206 -11.067792 -16.796800 -5.983401 -8.297101 6.657371
2 101915 9.980518 5.266869 9.546204 11.319655 8.908589 20.103598 0.907064 12.911264 2.576694 ... -1.944254 -0.950986 0.150241 3.160913 -8.325856 -9.110250 -7.625908 -1.689586 -1.384789 1.175621
3 102816 20.833271 -16.487274 1.886792 -0.946059 10.643481 3.247931 13.517639 13.682730 -7.682030 ... -3.673315 -2.800902 12.236763 6.242775 -9.369718 -12.720637 8.636415 20.621203 9.931208 8.350895
4 103414 9.885700 5.633930 9.734162 9.824339 13.819462 21.356125 21.945681 7.596248 13.637148 ... 9.498112 7.589702 4.858379 14.831399 21.459354 -7.949689 18.820264 10.545901 12.616854 12.111169
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93 199655 -3.796242 -3.248363 -12.931731 -7.968932 -1.838899 -2.248285 -0.109514 1.404556 -8.534893 ... -4.010796 1.193511 0.801685 -0.544744 -0.791156 22.397713 -2.309791 9.854299 -1.874123 0.729667
94 200614 -2.340933 -0.460370 -6.904875 -13.213405 -0.929012 -7.979797 11.367749 18.012441 -1.613193 ... 11.388163 11.611773 -12.670486 4.437090 1.793672 -0.470018 -8.310730 9.031029 -6.830466 -0.281131
95 201111 -0.313696 -3.006372 7.014663 -1.106524 7.094315 1.009450 9.850395 6.223135 2.587723 ... -1.079248 10.458447 -10.125810 -8.503565 6.909634 26.248419 15.927485 13.441762 6.393254 4.603233
96 201414 9.047621 3.032789 -4.672088 -2.014852 0.149621 22.042231 1.823159 22.238250 -1.161439 ... 4.350870 2.288127 -0.454990 7.512785 10.580950 7.709618 0.995246 0.200687 3.556047 14.316392
97 205119 18.881525 12.401503 7.964594 -0.326323 24.005220 15.183962 16.718064 9.807230 15.608610 ... -15.698724 -16.134373 -19.458602 -16.554394 -16.920817 -41.737430 14.800047 19.709679 -5.829851 -10.290764

98 rows × 73 columns

data_IES
SUBJECTS EMOTION T1 IES EMOTION T2 IES SOCIAL T1 IES SOCIAL T2 IES GAMBLING IES
0 100307 6.994464 6.238000 4.790000 5.750 3.609062
1 100408 7.119444 6.708667 10.593333 12.675 4.811875
2 101915 9.155087 8.345333 11.930000 13.990 5.093125
3 102816 7.111111 6.612000 11.890000 14.195 4.874375
4 103414 7.847222 8.561333 7.386667 7.955 4.704063
... ... ... ... ... ... ...
93 199655 7.082907 6.660000 6.656667 9.225 3.010625
94 200614 7.932778 8.359333 6.043333 8.830 4.212500
95 201111 11.040556 9.281333 10.126667 10.270 5.563750
96 201414 8.061667 8.048000 9.090000 11.725 3.100312
97 205119 8.516055 7.414000 7.970000 21.400 5.100937

98 rows × 6 columns

data_personality
Subject NEOFAC_A NEOFAC_O NEOFAC_C NEOFAC_N NEOFAC_E
0 100307 1 0 1 0 1
1 100408 0 1 0 0 1
2 101915 1 1 1 0 1
3 102816 1 0 0 0 1
4 103414 0 1 0 1 1
... ... ... ... ... ... ...
93 199655 0 1 0 0 0
94 200614 1 0 1 0 1
95 201111 1 1 1 1 0
96 201414 1 1 1 0 1
97 205119 1 1 1 0 1

98 rows × 6 columns

# Format features and target variable
X_IES = data_IES.iloc[:, 1:].values
X_BOLD = data_BOLD.iloc[:,1:].values
#personality
y_A = data_personality.iloc[:, 1].values
y_O = data_personality.iloc[:, 2].values
y_C = data_personality.iloc[:, 3].values
y_N = data_personality.iloc[:, 4].values
y_E = data_personality.iloc[:, 5].values
print("Matrix of features", X_IES, sep='\n') # so everything except for id, and diagnosis
print("--------------------------------------------------")
print("Matrix of features BOLD", X_BOLD, sep='\n') # so everything except for id, and diagnosis
print("--------------------------------------------------")
print("Target Variable", y_A, sep='\n') #label Agreeableness
Matrix of features
[[ 6.99446367  6.238       4.79        5.75        3.6090625 ]
 [ 7.11944444  6.70866667 10.59333333 12.675       4.811875  ]
 [ 9.1550865   8.34533333 11.93       13.99        5.093125  ]
 [ 7.11111111  6.612      11.89       14.195       4.874375  ]
 [ 7.84722222  8.56133333  7.38666667  7.955       4.7040625 ]
 [ 7.71722222  8.13520408 10.8075     21.38        3.329375  ]
 [ 9.17555556  7.97066667  9.14       19.8         3.678125  ]
 [ 7.55944444  7.17        9.14       15.1         3.555     ]
 [ 6.64945312  7.00533333  7.12666667  6.49        3.0796875 ]
 [ 7.855       7.23333333  6.80333333  7.095       3.3903125 ]
 [ 7.65404844  8.12933333 11.81666667 12.22        8.1771875 ]
 [ 8.25444444 10.50532544  8.62        7.605       3.0190625 ]
 [ 6.98777778  8.168      11.48666667 10.125       3.63625   ]
 [ 8.356609    6.99933333  5.24666667 33.92        3.7440625 ]
 [ 7.38222222  7.73133333  7.76666667  8.095       2.980625  ]
 [ 7.86833333  8.65866667  6.83       18.53        5.8440625 ]
 [ 5.87647059  5.23266667 23.91        9.3         3.91225807]
 [ 9.01722222  9.51666667  9.39666667 11.51        4.42006244]
 [ 7.38722222  7.872      10.575       6.955       3.9453125 ]
 [ 8.4         8.974       5.91        7.23        6.2940625 ]
 [ 7.97888889  8.86533333  8.27333333  8.44        5.06305931]
 [ 8.61611111  9.432       7.07        9.84        5.6909375 ]
 [ 7.77888889  8.34030612  8.56666667  6.88        3.14      ]
 [ 7.10555556  6.52466667  6.58666667 10.725       5.16795005]
 [10.22695312  7.456       8.65666667 15.24        5.880625  ]
 [ 7.51833333  8.72133333  9.84       11.975       6.27080125]
 [ 7.51666667  6.96        8.49333333  6.425       5.4365625 ]
 [10.88666667 10.89413265 10.58        8.57        5.219375  ]
 [ 7.19333333  9.21733333  4.57666667  4.665       2.9871875 ]
 [ 8.195       5.778       8.33        7.76        4.95983351]
 [ 8.735       9.04733333 37.65       18.02        5.125625  ]
 [ 6.09        6.14533333  8.06        9.36        2.6371875 ]
 [ 8.40456747  8.47066667  7.67666667 22.64        4.2840625 ]
 [ 6.41777778  6.60266667  7.62333333 12.02        6.8284375 ]
 [ 9.34111111  8.94066667  8.56333333 15.03        5.77125   ]
 [ 8.95722222  8.86666667  7.89666667  7.27        2.46875   ]
 [ 7.07979239  5.518       7.07333333 10.445       2.4490625 ]
 [ 5.46        6.12333333  6.05        6.195       3.4141103 ]
 [ 7.49722222  6.256       6.47        6.235       4.2884375 ]
 [ 9.08333333  8.956       7.97666667  6.385       3.6946875 ]
 [ 8.38611111  8.17266667  9.88333333  4.585       6.61125   ]
 [ 9.00351563  9.26709184  9.91       15.18        2.85502601]
 [ 9.58833333 11.01333333  8.1975      5.765       2.980625  ]
 [ 8.60388889  7.69533333  9.66333333 11.425       3.8365625 ]
 [ 7.28888889  7.11333333  5.78666667  6.445       3.7728125 ]
 [ 8.77944444  8.74438775 10.9575     11.23        4.10375   ]
 [ 7.52013841  7.438       5.57333333 10.04        3.84375   ]
 [ 7.94722222  7.25466667  7.42        6.54        3.84099896]
 [ 8.73716263 10.753125    7.02666667 14.4         3.721875  ]
 [ 7.00888889  7.46933333  9.12        7.135       3.68316337]
 [ 9.95666667 10.5408284  10.22       29.34        5.5071875 ]
 [ 7.85522491  6.63266667  7.44333333 10.12        3.68375   ]
 [ 7.56666667  7.95933333 10.86       14.12        3.7296875 ]
 [ 7.01388889  7.472      12.04333333 12.745       4.6921875 ]
 [ 5.91010381  5.73466667 21.41666667 26.76        5.94125   ]
 [ 8.935       8.378      14.58333333 21.96        5.76899063]
 [ 6.95        7.96530612  7.59333333  8.915       3.7725    ]
 [ 7.60944444 10.75255102 10.31333333 10.2         3.7203125 ]
 [ 6.78277778  6.03866667  5.49666667  6.905       3.2825    ]
 [ 6.00444444  6.81733333  6.47        5.43        4.596875  ]
 [ 6.30611111  6.89133333  9.26333333  6.96        4.6284375 ]
 [ 8.025       7.47466667  8.41333333  7.205       3.79338189]
 [ 9.17555556  8.728      11.3475      8.37        4.7103125 ]
 [ 9.30269896 10.14266667 13.67666667 24.94        4.845625  ]
 [ 7.13055556  7.752       5.55333333  5.56        3.1428125 ]
 [ 8.3983391   6.92866667  5.73333333  5.995       3.633125  ]
 [ 9.705       9.826      10.01666667  6.005       5.4121875 ]
 [10.5032      8.246       7.06666667  8.72        4.22625   ]
 [10.22222222  8.90066667  5.43333333  6.82        3.4578125 ]
 [ 7.98555556  8.29066667  8.565       7.28        3.6975    ]
 [ 7.12588235  6.42866667  6.62        6.1         4.195625  ]
 [ 9.32222222  9.08666667  7.59666667  5.81        4.4125    ]
 [ 8.04444444  8.57       13.13       12.23        4.7103125 ]
 [ 5.94055556  5.76533333  6.94        6.16        4.3484375 ]
 [ 6.85277778  6.24533333  8.18333333  8.515       4.226875  ]
 [ 8.24055556  7.894      10.02333333 11.74        4.76906667]
 [ 9.53833333  8.56333333  9.40666667  7.685       4.43125   ]
 [ 9.36277778 10.12866667  6.20666667  6.25        5.1765625 ]
 [ 7.83965398  6.996      16.3575     11.815       5.125     ]
 [ 7.79222222 10.72333333  9.18666667 10.67        4.8859375 ]
 [ 8.155       8.492       7.87       25.7         3.4203125 ]
 [ 7.58242214  7.282       8.19666667  9.025       4.8871875 ]
 [ 6.75111111  7.84438775 13.65        7.93        4.51629553]
 [ 7.97888889  8.686       7.38333333  6.115       3.34875   ]
 [ 7.77722222  6.66733333  7.54       13.4         4.825     ]
 [ 9.79555556  9.48333333  7.86666667  8.865       5.075     ]
 [ 9.11444444  8.40133333  6.18333333 13.85        5.2553125 ]
 [ 6.94888889  6.344       8.36       13.88        4.35      ]
 [ 7.83333333  8.22666667 21.5175     20.92        3.8226847 ]
 [ 6.59777778  7.18010204 11.73        5.8         3.8321875 ]
 [ 7.09055556  7.49        7.06666667 28.98        3.1084375 ]
 [ 7.72567474  7.476       8.02333333  8.58        4.6753125 ]
 [ 8.06166667  7.386       5.05333333  5.33        3.450625  ]
 [ 7.08290657  6.66        6.65666667  9.225       3.010625  ]
 [ 7.93277778  8.35933333  6.04333333  8.83        4.2125    ]
 [11.04055556  9.28133333 10.12666667 10.27        5.56375   ]
 [ 8.06166667  8.048       9.09       11.725       3.1003125 ]
 [ 8.51605536  7.414       7.97       21.4         5.1009375 ]]
--------------------------------------------------
Matrix of features BOLD
[[ -2.80492079  -2.43380419  -0.77798455 ...  -3.0259323    8.82648362
    7.19963736]
 [  3.5161865    7.52967116  -9.52103931 ...  -5.98340103  -8.29710147
    6.65737091]
 [  9.9805183    5.266869     9.54620418 ...  -1.68958553  -1.38478885
    1.17562126]
 ...
 [ -0.31369563  -3.00637191   7.01466318 ...  13.44176231   6.39325368
    4.60323275]
 [  9.04762062   3.0327889   -4.67208768 ...   0.20068705   3.5560471
   14.31639195]
 [ 18.88152454  12.40150292   7.9645944  ...  19.7096792   -5.82985081
  -10.29076429]]
--------------------------------------------------
Target Variable
[1 0 1 1 0 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 1 1 0 1 1 0 0
 1 0 0 1 0 1 1 1 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1
 1 0 1 0 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 1 1 1 1]

Let’s analyze the data first

from collections import Counter
print(dict(Counter(y_A))) #Agreeableness
print(dict(Counter(y_O))) #Openness
print(dict(Counter(y_C))) #Conscientiousness
print(dict(Counter(y_N))) #Neuroticism
print(dict(Counter(y_E))) #Extraversion
{1: 50, 0: 48}
{0: 48, 1: 50}
{1: 52, 0: 46}
{0: 46, 1: 52}
{1: 51, 0: 47}

In y_A, y_O,and y_C, there is only one element with label 0. This makes it impossible to train a model using label 0 and also evaluate it on that label.

The data is now well distributed

Functions to train the model#


Training y_N (Please ignore)

# make a copy to keep the original data without labels
data_BOLD_ye = data_BOLD.copy()
data_BOLD_ye['y_E'] = y_E
# data_BOLD_ye_clean = data_BOLD_ye[data_BOLD_ye['y_E'] != 0]
X_BOLD_ye = data_BOLD_ye.iloc[:,1:-1].values # does not include subject_id and y_E
bold_y_E = data_BOLD_ye['y_E'].values
# make a copy to keep the original data without labels
data_IES_ye = data_IES.copy()
data_IES_ye['y_E'] = y_E
data_IES_ye_clean = data_IES_ye[data_IES_ye['y_E'] != 0]
X_IES_ye = data_IES_ye_clean.iloc[:,1:-1].values # does not include subject_id and y_E
ies_y_E = data_IES_ye_clean['y_E'].values
#since the values of y_E are '1' and '2'. I will do -1 to each element to get '0' and '1'
ies_y_E = ies_y_E-1

y_N

# make a copy to keep the original data without labels
data_BOLD_yn = data_BOLD.copy()
data_BOLD_yn['y_N'] = y_N
data_BOLD_yn_clean = data_BOLD_yn[data_BOLD_yn['y_N'] != 2]
X_BOLD_yn = data_BOLD_yn_clean.iloc[:,1:-1].values # does not include subject_id and y_N
bold_y_N = data_BOLD_yn_clean['y_N'].values
# make a copy to keep the original data without labels
data_IES_yn = data_IES.copy()
data_IES_yn['y_N'] = y_N
data_IES_yn_clean = data_IES_yn[data_IES_yn['y_N'] != 2] #outlier class is 2
X_IES_yn = data_IES_yn_clean.iloc[:,1:-1].values # does not include subject_id and y_N
ies_y_N = data_IES_yn_clean['y_N'].values

y_A

dict(Counter(y_A))
{1: 50, 0: 48}
# make a copy to keep the original data without labels
data_BOLD_ya = data_BOLD.copy()
data_BOLD_ya['y_A'] = y_A
data_BOLD_ya_clean = data_BOLD_ya[data_BOLD_ya['y_A'] != 0]
data_BOLD_ya1_clean = data_BOLD_ya_clean[data_BOLD_ya_clean['y_A'] == 1]
# add repeated data points with label 1 to oversample this class and make the dataset balanced
data_BOLD_ya_clean = pd.concat([data_BOLD_ya_clean, data_BOLD_ya1_clean])
data_BOLD_ya_clean = data_BOLD_ya_clean.sample(frac=1, random_state=42)
X_BOLD_ya = data_BOLD_ya_clean.iloc[:,1:-1].values # does not include subject_id and y_A
bold_y_A = data_BOLD_ya_clean['y_A'].values
#since the values of y_A are '1' and '2'. I will do -1 to each element to get '0' and '1'
bold_y_A = bold_y_A-1
dict(Counter(bold_y_A))
{0: 100}
# make a copy to keep the original data without labels
data_IES_ya = data_IES.copy()
data_IES_ya['y_A'] = y_A
data_IES_ya_clean = data_IES_ya[data_IES_ya['y_A'] != 0]
data_IES_ya1_clean = data_IES_ya[data_IES_ya['y_A'] == 1]
# add repeated data points with label 1 to oversample this class and make the dataset balanced
data_IES_ya_clean = pd.concat([data_IES_ya_clean, data_IES_ya1_clean])
X_IES_ya = data_IES_ya_clean.iloc[:,1:-1].values # does not include subject_id and y_A
ies_y_A = data_IES_ya_clean['y_A'].values
#since the values of y_A are '1' and '2'. I will do -1 to each element to get '0' and '1'
ies_y_A = ies_y_A-1

Extract relevant network#


#Extract relevant network
# Social Cognition: dorso-atten, cingulo oper, default network
# Gambling: Cingulo-Oper, Somatomotor
# Emotion:  Posterior-Mu, Orbito-Affec
list_rel_networks = ["Emotion_Orbito-Affec_L" , "Emotion_Orbito-Affec_R" , "Emotion_Posterior-Mu_L" ,	"Emotion_Posterior-Mu_R",
"Social_Default_L" , "Social_Default_R" , "Social_Dorsal-atten_L" , "Social_Dorsal-atten_R" , "Social_Cingulo-Oper_L" , "Social_Cingulo-Oper_R",
"Gambling_Cingulo-Oper_L" , "Gambling_Cingulo-Oper_R","Gambling_Somatomotor_L","Gambling_Somatomotor_R"]

data_BOLD_rel_networks = data_BOLD.loc[:, list_rel_networks]
X_BOLD_rel_networks = data_BOLD.loc[:, list_rel_networks].values

print("X_BOLD_rel",X_BOLD_rel_networks)


data_BOLD_rel_networks
X_BOLD_rel [[ 28.43122907   0.45423503   5.57731995 ...  -2.72906399  -3.17972421
   -1.81059444]
 [ -2.28557407  10.50433129   3.48441709 ...   1.55804715  -6.05703247
    4.56815643]
 [  2.37558422  20.70941068  10.91275624 ...   7.90590405   0.1502409
    3.16091272]
 ...
 [  6.09024801   7.16814804   2.71281271 ... -16.94888299 -10.12580988
   -8.50356458]
 [ 20.06966877  -2.22965354   1.99569376 ...  11.16623543  -0.45498965
    7.51278519]
 [ 20.63729181  21.44628432  20.10109145 ... -13.64763183 -19.45860156
  -16.55439367]]
Emotion_Orbito-Affec_L Emotion_Orbito-Affec_R Emotion_Posterior-Mu_L Emotion_Posterior-Mu_R Social_Default_L Social_Default_R Social_Dorsal-atten_L Social_Dorsal-atten_R Social_Cingulo-Oper_L Social_Cingulo-Oper_R Gambling_Cingulo-Oper_L Gambling_Cingulo-Oper_R Gambling_Somatomotor_L Gambling_Somatomotor_R
0 28.431229 0.454235 5.577320 -2.292669 7.947511 9.692712 7.256165 7.648978 2.103778 -4.409611 0.201830 -2.729064 -3.179724 -1.810594
1 -2.285574 10.504331 3.484417 6.491784 1.251033 23.132281 17.014608 13.279836 3.523961 11.183352 -10.916530 1.558047 -6.057032 4.568156
2 2.375584 20.709411 10.912756 14.266931 3.360379 26.191055 11.821487 4.827629 -6.936404 2.386275 6.541618 7.905904 0.150241 3.160913
3 -6.299961 11.344551 -16.024109 -6.153889 31.087654 43.157114 33.487436 9.643869 39.793509 21.317714 15.593201 4.404389 12.236763 6.242775
4 21.917341 -9.262694 10.743872 6.427080 24.837916 16.653878 16.779754 0.681534 13.613428 0.102399 1.520076 8.167790 4.858379 14.831399
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93 2.353674 -5.315302 -2.678669 -3.597959 6.386875 15.979174 9.778716 16.583812 1.405442 1.693786 3.011257 -0.583554 0.801685 -0.544744
94 13.488223 -2.153168 -0.915092 -11.025394 6.462592 20.029918 12.159982 28.898447 0.610042 2.912552 -3.312991 5.908460 -12.670486 4.437090
95 6.090248 7.168148 2.712813 1.874720 0.794011 13.211028 5.884091 9.997345 -16.163660 -13.464351 -15.076135 -16.948883 -10.125810 -8.503565
96 20.069669 -2.229654 1.995694 12.395959 -14.147689 21.109725 16.930378 0.463004 -4.729206 -11.472062 11.540953 11.166235 -0.454990 7.512785
97 20.637292 21.446284 20.101091 18.260701 12.243844 -18.868113 13.676068 -25.399274 15.839693 -18.173780 -24.940111 -13.647632 -19.458602 -16.554394

98 rows × 14 columns

Merging data for model 1.2 and 2.2#


# wanna merge X_IES with X_BOLD_rel_network
# maybe row extend??

df_all = pd.concat([data_IES, data_BOLD_rel_networks.iloc[:, 1:]], axis=1)
X_combined = df_all.iloc[:, 1:].values

# print(df_all)
print(X_combined)
[[  6.99446367   6.238        4.79       ...  -2.72906399  -3.17972421
   -1.81059444]
 [  7.11944444   6.70866667  10.59333333 ...   1.55804715  -6.05703247
    4.56815643]
 [  9.1550865    8.34533333  11.93       ...   7.90590405   0.1502409
    3.16091272]
 ...
 [ 11.04055556   9.28133333  10.12666667 ... -16.94888299 -10.12580988
   -8.50356458]
 [  8.06166667   8.048        9.09       ...  11.16623543  -0.45498965
    7.51278519]
 [  8.51605536   7.414        7.97       ... -13.64763183 -19.45860156
  -16.55439367]]

df_allnets = pd.concat([data_IES, data_BOLD.iloc[:, 1:]], axis=1)
X_combined_all = df_allnets.iloc[:, 1:].values

print(df_allnets)
print(X_combined_all)
    SUBJECTS  EMOTION T1 IES  EMOTION T2 IES  SOCIAL T1 IES  SOCIAL T2 IES  \
0     100307        6.994464        6.238000       4.790000          5.750   
1     100408        7.119444        6.708667      10.593333         12.675   
2     101915        9.155087        8.345333      11.930000         13.990   
3     102816        7.111111        6.612000      11.890000         14.195   
4     103414        7.847222        8.561333       7.386667          7.955   
..       ...             ...             ...            ...            ...   
93    199655        7.082907        6.660000       6.656667          9.225   
94    200614        7.932778        8.359333       6.043333          8.830   
95    201111       11.040556        9.281333      10.126667         10.270   
96    201414        8.061667        8.048000       9.090000         11.725   
97    205119        8.516055        7.414000       7.970000         21.400   

    GAMBLING IES  Emotion_Auditory_L  Emotion_Auditory_R  \
0       3.609062           -2.804921           -2.433804   
1       4.811875            3.516186            7.529671   
2       5.093125            9.980518            5.266869   
3       4.874375           20.833271          -16.487274   
4       4.704063            9.885700            5.633930   
..           ...                 ...                 ...   
93      3.010625           -3.796242           -3.248363   
94      4.212500           -2.340933           -0.460370   
95      5.563750           -0.313696           -3.006372   
96      3.100312            9.047621            3.032789   
97      5.100937           18.881525           12.401503   

    Emotion_Cingulo-Oper_L  Emotion_Cingulo-Oper_R  ...  \
0                -0.777985               -0.353353  ...   
1                -9.521039               -5.284645  ...   
2                 9.546204               11.319655  ...   
3                 1.886792               -0.946059  ...   
4                 9.734162                9.824339  ...   
..                     ...                     ...  ...   
93              -12.931731               -7.968932  ...   
94               -6.904875              -13.213405  ...   
95                7.014663               -1.106524  ...   
96               -4.672088               -2.014852  ...   
97                7.964594               -0.326323  ...   

    Gambling_Posterior-Mu_L  Gambling_Posterior-Mu_R  Gambling_Somatomotor_L  \
0                 -7.973004                 2.530932               -3.179724   
1                -24.315753               -18.506888               -6.057032   
2                 -1.944254                -0.950986                0.150241   
3                 -3.673315                -2.800902               12.236763   
4                  9.498112                 7.589702                4.858379   
..                      ...                      ...                     ...   
93                -4.010796                 1.193511                0.801685   
94                11.388163                11.611773              -12.670486   
95                -1.079248                10.458447              -10.125810   
96                 4.350870                 2.288127               -0.454990   
97               -15.698724               -16.134373              -19.458602   

    Gambling_Somatomotor_R  Gambling_Ventral-Mult_L  Gambling_Ventral-Mult_R  \
0                -1.810594                -7.124124                 1.524933   
1                 4.568156               -48.646206               -11.067792   
2                 3.160913                -8.325856                -9.110250   
3                 6.242775                -9.369718               -12.720637   
4                14.831399                21.459354                -7.949689   
..                     ...                      ...                      ...   
93               -0.544744                -0.791156                22.397713   
94                4.437090                 1.793672                -0.470018   
95               -8.503565                 6.909634                26.248419   
96                7.512785                10.580950                 7.709618   
97              -16.554394               -16.920817               -41.737430   

    Gambling_Visual1_L  Gambling_Visual1_R  Gambling_Visual2_L  \
0            -3.339643           -3.025932            8.826484   
1           -16.796800           -5.983401           -8.297101   
2            -7.625908           -1.689586           -1.384789   
3             8.636415           20.621203            9.931208   
4            18.820264           10.545901           12.616854   
..                 ...                 ...                 ...   
93           -2.309791            9.854299           -1.874123   
94           -8.310730            9.031029           -6.830466   
95           15.927485           13.441762            6.393254   
96            0.995246            0.200687            3.556047   
97           14.800047           19.709679           -5.829851   

    Gambling_Visual2_L.1  
0               7.199637  
1               6.657371  
2               1.175621  
3               8.350895  
4              12.111169  
..                   ...  
93              0.729667  
94             -0.281131  
95              4.603233  
96             14.316392  
97            -10.290764  

[98 rows x 78 columns]
[[  6.99446367   6.238        4.79       ...  -3.0259323    8.82648362
    7.19963736]
 [  7.11944444   6.70866667  10.59333333 ...  -5.98340103  -8.29710147
    6.65737091]
 [  9.1550865    8.34533333  11.93       ...  -1.68958553  -1.38478885
    1.17562126]
 ...
 [ 11.04055556   9.28133333  10.12666667 ...  13.44176231   6.39325368
    4.60323275]
 [  8.06166667   8.048        9.09       ...   0.20068705   3.5560471
   14.31639195]
 [  8.51605536   7.414        7.97       ...  19.7096792   -5.82985081
  -10.29076429]]

##Shuffle the data

import random

X_BOLD_rel_net_random = X_BOLD_rel_networks.copy()
# print(X_BOLD_random)
np.random.shuffle(X_BOLD_rel_net_random)
# print("\n after shuffling: ",X_BOLD_random)

#check shuffle row properly?
for i, elem in enumerate(X_BOLD_rel_net_random):
  if (elem == X_BOLD_rel_networks[0]).all():
    print(elem,"index: ",i)

X_IES_random = X_IES.copy()
# print("\n",X_IES_random)
np.random.shuffle(X_IES_random)
# print("\n after shuffling: ",X_IES_random)

#check shuffle row properly?
for i, elem in enumerate(X_IES_random):
  if (elem == X_IES[0]).all():
    print(elem,"index: ",i)
 after shuffling:  [[ 16.45218713  11.38416407  -3.60465966 ...  15.48796208   7.96992576
   -2.74994526]
 [ -0.34926509  -6.22595071 -11.57751518 ...   7.69581071  -8.43524908
    7.1706866 ]
 [ 14.92651966  -3.73512732   7.23280184 ...  -9.79579736   7.20520023
    4.20299898]
 ...
 [ 14.4729777   -0.07256222  10.83421641 ...  -2.13700959   2.33014651
    9.49200178]
 [-14.01305881 -11.66171774 -14.09056564 ...  -0.79217031  15.67001269
   14.88915906]
 [ -9.10286363   3.51498535 -16.55086295 ... -11.4951203  -13.24858253
    2.69033476]]
[28.43122907  0.45423503  5.57731995 -2.29266949  7.9475113   9.69271182
  7.25616452  7.64897799  2.10377849 -4.40961087  0.20183027 -2.72906399
 -3.17972421 -1.81059444] index:  94
[6.99446367 6.238      4.79       5.75       3.6090625 ] index:  63
# IES + shuffled bold
# merge these together
df_X_BOLD_rel_networks = data_BOLD.loc[:, list_rel_networks]
df_X_BOLD_rel_networks_shuffled = df_X_BOLD_rel_networks.sample(frac=1, random_state=42)
# NOTE we are mixing the columns of different subjects!! so one subject has features from other subjects
df_all_shuf = pd.concat([data_IES, df_X_BOLD_rel_networks_shuffled.iloc[:, 1:]], axis=1)
X_IES_BOLD_shuf = df_all_shuf.iloc[:, 1:].values

X_IES_BOLD_shuf
array([[  6.99446367,   6.238     ,   4.79      , ...,  -2.72906399,
         -3.17972421,  -1.81059444],
       [  7.11944444,   6.70866667,  10.59333333, ...,   1.55804715,
         -6.05703247,   4.56815643],
       [  9.1550865 ,   8.34533333,  11.93      , ...,   7.90590405,
          0.1502409 ,   3.16091272],
       ...,
       [ 11.04055556,   9.28133333,  10.12666667, ..., -16.94888299,
        -10.12580988,  -8.50356458],
       [  8.06166667,   8.048     ,   9.09      , ...,  11.16623543,
         -0.45498965,   7.51278519],
       [  8.51605536,   7.414     ,   7.97      , ..., -13.64763183,
        -19.45860156, -16.55439367]])
df_all = pd.concat([data_IES, data_BOLD_rel_networks.iloc[:, 1:]], axis=1)
df_all = df_all.sample(frac=1, random_state=42)
df_all
SUBJECTS EMOTION T1 IES EMOTION T2 IES SOCIAL T1 IES SOCIAL T2 IES GAMBLING IES Emotion_Orbito-Affec_R Emotion_Posterior-Mu_L Emotion_Posterior-Mu_R Social_Default_L Social_Default_R Social_Dorsal-atten_L Social_Dorsal-atten_R Social_Cingulo-Oper_L Social_Cingulo-Oper_R Gambling_Cingulo-Oper_L Gambling_Cingulo-Oper_R Gambling_Somatomotor_L Gambling_Somatomotor_R
62 156233 9.175556 8.728000 11.347500 8.370 4.710312 47.444448 -10.106824 16.675076 56.434866 28.182136 31.546946 42.600209 55.595711 25.529516 -11.152869 2.489001 -40.084649 45.352231
40 133928 8.386111 8.172667 9.883333 4.585 6.611250 -2.774743 -4.332258 -4.875565 6.707974 21.594923 4.315081 5.944338 2.674365 -0.610924 11.541634 -8.039645 2.023116 -7.822105
94 200614 7.932778 8.359333 6.043333 8.830 4.212500 -2.153168 -0.915092 -11.025394 6.462592 20.029918 12.159982 28.898447 0.610042 2.912552 -3.312991 5.908460 -12.670486 4.437090
18 114924 7.387222 7.872000 10.575000 6.955 3.945312 1.288621 18.902996 19.232304 9.561598 22.492452 -17.725834 9.353869 36.199382 22.825000 -1.481654 11.656833 -7.408190 1.623945
81 185139 7.582422 7.282000 8.196667 9.025 4.887187 -2.815439 17.874201 5.387013 44.268219 46.451894 27.331754 29.353116 25.461625 17.506039 20.236217 12.966751 7.593207 6.259435
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
60 153429 6.306111 6.891333 9.263333 6.960 4.628438 -9.412777 -16.422605 -11.352146 33.459260 34.156747 22.302166 6.948978 19.700245 20.156487 -2.216582 3.437919 -1.432907 1.421361
71 162329 9.322222 9.086667 7.596667 5.810 4.412500 -10.584116 1.107784 0.419456 16.108770 12.717068 0.852195 3.726035 3.553585 -8.032073 -11.537315 -9.315438 -2.816885 -2.717131
14 111514 7.382222 7.731333 7.766667 8.095 2.980625 -8.531749 13.911354 8.411415 13.149241 16.950228 12.188672 12.345472 3.525659 7.314959 -1.239870 -1.260985 -5.801990 -1.970751
92 199150 8.061667 7.386000 5.053333 5.330 3.450625 -4.016565 5.786369 12.045751 57.057203 81.129574 49.524420 61.291128 54.722875 69.305258 3.976578 -0.860398 0.308576 4.036385
51 148032 7.855225 6.632667 7.443333 10.120 3.683750 2.668375 22.794715 12.120931 9.295083 46.248595 5.619867 36.289513 -5.692058 17.192820 -4.886173 -4.202818 -8.246974 -13.320883

98 rows × 19 columns

Cross Validation#


# K-Fold Cross-Validation
from sklearn.model_selection import cross_validate
def cross_validation(model, _X, _y, _cv=10):
      '''Function to perform 10 Folds Cross-Validation
       Parameters
       ----------
      model: Python Class, default=None
              This is the machine learning algorithm to be used for training.
      _X: array
           This is the matrix of features.
      _y: array
           This is the target variable.
      _cv: int, default=10
          Determines the number of folds for cross-validation.
       Returns
       -------
       The function returns a dictionary containing the metrics 'accuracy', 'precision',
       'recall', 'f1' for both training set and validation set.
      '''
      _scoring = ['accuracy', 'precision', 'recall', 'f1']
      results = cross_validate(estimator=model,
                               X=_X,
                               y=_y,
                               cv=_cv,
                               scoring=_scoring,
                               return_train_score=True)
      # print(len(results))
      # print(results)

      return {"Training Accuracy scores": results['train_accuracy'],
              "Mean Training Accuracy": results['train_accuracy'].mean()*100,
              "Training Precision scores": results['train_precision'],
              "Mean Training Precision": results['train_precision'].mean(),
              "Training Recall scores": results['train_recall'],
              "Mean Training Recall": results['train_recall'].mean(),
              "Training F1 scores": results['train_f1'],
              "Mean Training F1 Score": results['train_f1'].mean(),
              "Validation Accuracy scores": results['test_accuracy'],
              "Mean Validation Accuracy": results['test_accuracy'].mean()*100,
              "Validation Precision scores": results['test_precision'],
              "Mean Validation Precision": results['test_precision'].mean(),
              "Validation Recall scores": results['test_recall'],
              "Mean Validation Recall": results['test_recall'].mean(),
              "Validation F1 scores": results['test_f1'],
              "Mean Validation F1 Score": results['test_f1'].mean()
              }
# Grouped Bar Chart for both training and validation data
def plot_result(x_label, y_label, plot_title, train_data, val_data, num_folds=10):
        '''Function to plot a grouped bar chart showing the training and validation
          results of the ML model in each fold after applying K-fold cross-validation.
         Parameters
         ----------
         x_label: str,
            Name of the algorithm used for training e.g 'Decision Tree'

         y_label: str,
            Name of metric being visualized e.g 'Accuracy'
         plot_title: str,
            This is the title of the plot e.g 'Accuracy Plot'

         train_result: list, array
            This is the list containing either training precision, accuracy, or f1 score.

         val_result: list, array
            This is the list containing either validation precision, accuracy, or f1 score.
         Returns
         -------
         The function returns a Grouped Barchart showing the training and validation result
         in each fold.
        '''

        # Set size of plot
        plt.figure(figsize=(12,6))
        labels = ["1st Fold", "2nd Fold", "3rd Fold", "4th Fold", "5th Fold","6th Fold", "7th Fold","8th Fold", "9th Fold","10th Fold"][:num_folds]
        X_axis = np.arange(len(labels))
        ax = plt.gca()
        plt.ylim(0.40000, 1)
        plt.bar(X_axis-0.2, train_data, 0.4, color='blue', label='Training')
        plt.bar(X_axis+0.2, val_data, 0.4, color='red', label='Validation')
        plt.title(plot_title, fontsize=30)
        plt.xticks(X_axis, labels)
        plt.xlabel(x_label, fontsize=14)
        plt.ylabel(y_label, fontsize=14)
        plt.legend()
        plt.grid(True)
        plt.show()
num_folds = 10

SVM#


1.1 IES -> personality#

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

#define the model
svm = LinearSVC(max_iter=100000)
#perform cross validation with model


#y_A
svm_result = cross_validation(svm, X_IES, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.6056099456099455 
 Mean Validation Accuracy 57.333333333333336
{'Training Accuracy scores': array([0.63636364, 0.67045455, 0.70454545, 0.64772727, 0.65909091,
        0.69318182, 0.67045455, 0.67045455, 0.59550562, 0.65168539]),
 'Mean Training Accuracy': 65.99463738508683,
 'Training Precision scores': array([0.63265306, 0.66      , 0.70212766, 0.65217391, 0.66666667,
        0.6875    , 0.66666667, 0.68181818, 0.59574468, 0.64583333]),
 'Mean Training Precision': 0.6591184163178349,
 'Training Recall scores': array([0.68888889, 0.73333333, 0.73333333, 0.66666667, 0.66666667,
        0.73333333, 0.71111111, 0.66666667, 0.62222222, 0.68888889]),
 'Mean Training Recall': 0.691111111111111,
 'Training F1 scores': array([0.65957447, 0.69473684, 0.7173913 , 0.65934066, 0.66666667,
        0.70967742, 0.68817204, 0.6741573 , 0.60869565, 0.66666667]),
 'Mean Training F1 Score': 0.6745079025122479,
 'Validation Accuracy scores': array([0.6       , 0.5       , 0.3       , 0.5       , 0.7       ,
        0.6       , 0.6       , 0.6       , 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 57.333333333333336,
 'Validation Precision scores': array([0.6       , 0.5       , 0.33333333, 0.5       , 0.625     ,
        0.6       , 0.57142857, 0.57142857, 0.8       , 0.6       ]),
 'Mean Validation Precision': 0.5701190476190476,
 'Validation Recall scores': array([0.6, 0.4, 0.4, 0.6, 1. , 0.6, 0.8, 0.8, 0.8, 0.6]),
 'Mean Validation Recall': 0.6599999999999999,
 'Validation F1 scores': array([0.6       , 0.44444444, 0.36363636, 0.54545455, 0.76923077,
        0.6       , 0.66666667, 0.66666667, 0.8       , 0.6       ]),
 'Mean Validation F1 Score': 0.6056099456099455}
model_name = "SVM"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            svm_result["Training F1 scores"],
            svm_result["Validation F1 scores"])
_images/test_model_NMA_50_0.png
#y_O

#define the model
svm = LinearSVC(max_iter=100000)

svm_result = cross_validation(svm, X_IES, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result

The main results are 
 Mean Validation F1: 0.5646176046176047 
 Mean Validation Accuracy 53.333333333333336
{'Training Accuracy scores': array([0.57954545, 0.61363636, 0.54545455, 0.63636364, 0.60227273,
        0.57954545, 0.59090909, 0.51136364, 0.5505618 , 0.53932584]),
 'Mean Training Accuracy': 57.48978549540347,
 'Training Precision scores': array([0.58333333, 0.60377358, 0.54901961, 0.62264151, 0.61904762,
        0.58695652, 0.60465116, 0.52      , 0.55319149, 0.54      ]),
 'Mean Training Precision': 0.5782614828455243,
 'Training Recall scores': array([0.62222222, 0.71111111, 0.62222222, 0.73333333, 0.57777778,
        0.6       , 0.57777778, 0.57777778, 0.57777778, 0.6       ]),
 'Mean Training Recall': 0.6199999999999999,
 'Training F1 scores': array([0.60215054, 0.65306122, 0.58333333, 0.67346939, 0.59770115,
        0.59340659, 0.59090909, 0.54736842, 0.56521739, 0.56842105]),
 'Mean Training F1 Score': 0.5975038181942168,
 'Validation Accuracy scores': array([0.4       , 0.6       , 0.8       , 0.5       , 0.3       ,
        0.4       , 0.4       , 0.6       , 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 53.333333333333336,
 'Validation Precision scores': array([0.44444444, 0.66666667, 0.8       , 0.5       , 0.33333333,
        0.42857143, 0.4       , 0.6       , 0.8       , 0.57142857]),
 'Mean Validation Precision': 0.5544444444444444,
 'Validation Recall scores': array([0.8, 0.4, 0.8, 0.4, 0.4, 0.6, 0.4, 0.6, 0.8, 0.8]),
 'Mean Validation Recall': 0.6,
 'Validation F1 scores': array([0.57142857, 0.5       , 0.8       , 0.44444444, 0.36363636,
        0.5       , 0.4       , 0.6       , 0.8       , 0.66666667]),
 'Mean Validation F1 Score': 0.5646176046176047}
#y_C
#define the model
svm = LinearSVC(max_iter=100000)

svm_result = cross_validation(svm, X_IES, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result

The main results are 
 Mean Validation F1: 0.5350305250305251 
 Mean Validation Accuracy 42.77777777777778
{'Training Accuracy scores': array([0.56818182, 0.55681818, 0.60227273, 0.52272727, 0.55681818,
        0.52272727, 0.57954545, 0.59090909, 0.57303371, 0.61797753]),
 'Mean Training Accuracy': 56.91011235955056,
 'Training Precision scores': array([0.56666667, 0.56603774, 0.59677419, 0.53731343, 0.56451613,
        0.54385965, 0.59615385, 0.59322034, 0.57377049, 0.61403509]),
 'Mean Training Precision': 0.575234757171447,
 'Training Recall scores': array([0.73913043, 0.65217391, 0.78723404, 0.76595745, 0.74468085,
        0.65957447, 0.65957447, 0.74468085, 0.74468085, 0.74468085]),
 'Mean Training Recall': 0.7242368177613321,
 'Training F1 scores': array([0.64150943, 0.60606061, 0.67889908, 0.63157895, 0.64220183,
        0.59615385, 0.62626263, 0.66037736, 0.64814815, 0.67307692]),
 'Mean Training F1 Score': 0.6404268806954594,
 'Validation Accuracy scores': array([0.6       , 0.2       , 0.4       , 0.6       , 0.4       ,
        0.5       , 0.4       , 0.4       , 0.44444444, 0.33333333]),
 'Mean Validation Accuracy': 42.77777777777778,
 'Validation Precision scores': array([0.75      , 0.33333333, 0.42857143, 0.55555556, 0.44444444,
        0.5       , 0.42857143, 0.44444444, 0.5       , 0.42857143]),
 'Mean Validation Precision': 0.4813492063492063,
 'Validation Recall scores': array([0.5       , 0.33333333, 0.6       , 1.        , 0.8       ,
        0.8       , 0.6       , 0.8       , 0.4       , 0.6       ]),
 'Mean Validation Recall': 0.6433333333333333,
 'Validation F1 scores': array([0.6       , 0.33333333, 0.5       , 0.71428571, 0.57142857,
        0.61538462, 0.5       , 0.57142857, 0.44444444, 0.5       ]),
 'Mean Validation F1 Score': 0.5350305250305251}
y_N
#define the model
svm = LinearSVC(max_iter=100000)

svm_result = cross_validation(svm, X_IES, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.514032634032634 
 Mean Validation Accuracy 49.222222222222214
{'Training Accuracy scores': array([0.61363636, 0.61363636, 0.60227273, 0.61363636, 0.56818182,
        0.61363636, 0.625     , 0.61363636, 0.62921348, 0.57303371]),
 'Mean Training Accuracy': 60.658835546476,
 'Training Precision scores': array([0.63265306, 0.62745098, 0.62      , 0.63265306, 0.59574468,
        0.63829787, 0.63829787, 0.63043478, 0.64583333, 0.58823529]),
 'Mean Training Precision': 0.6249600938432727,
 'Training Recall scores': array([0.65957447, 0.68085106, 0.65957447, 0.65957447, 0.59574468,
        0.63829787, 0.65217391, 0.63043478, 0.65957447, 0.63829787]),
 'Mean Training Recall': 0.64740980573543,
 'Training F1 scores': array([0.64583333, 0.65306122, 0.63917526, 0.64583333, 0.59574468,
        0.63829787, 0.64516129, 0.63043478, 0.65263158, 0.6122449 ]),
 'Mean Training F1 Score': 0.6358418251917739,
 'Validation Accuracy scores': array([0.5       , 0.4       , 0.3       , 0.6       , 0.6       ,
        0.3       , 0.6       , 0.4       , 0.44444444, 0.77777778]),
 'Mean Validation Accuracy': 49.222222222222214,
 'Validation Precision scores': array([0.5       , 0.42857143, 0.25      , 0.57142857, 0.66666667,
        0.375     , 1.        , 0.5       , 0.5       , 0.8       ]),
 'Mean Validation Precision': 0.5591666666666666,
 'Validation Recall scores': array([0.6       , 0.6       , 0.2       , 0.8       , 0.4       ,
        0.6       , 0.33333333, 0.5       , 0.4       , 0.8       ]),
 'Mean Validation Recall': 0.5233333333333333,
 'Validation F1 scores': array([0.54545455, 0.5       , 0.22222222, 0.66666667, 0.5       ,
        0.46153846, 0.5       , 0.5       , 0.44444444, 0.8       ]),
 'Mean Validation F1 Score': 0.514032634032634}
#y_E

#define the model
svm = LinearSVC(max_iter=100000)

svm_result = cross_validation(svm, X_IES, y_E, num_folds)
print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.631042291042291 
 Mean Validation Accuracy 58.33333333333333
{'Training Accuracy scores': array([0.61363636, 0.60227273, 0.625     , 0.63636364, 0.64772727,
        0.59090909, 0.56818182, 0.61363636, 0.60674157, 0.61797753]),
 'Mean Training Accuracy': 61.22446373850868,
 'Training Precision scores': array([0.6       , 0.59016393, 0.61016949, 0.61290323, 0.63157895,
        0.58928571, 0.5625    , 0.60714286, 0.59649123, 0.60714286]),
 'Mean Training Precision': 0.600737825576813,
 'Training Recall scores': array([0.73333333, 0.7826087 , 0.7826087 , 0.82608696, 0.7826087 ,
        0.7173913 , 0.7826087 , 0.73913043, 0.73913043, 0.73913043]),
 'Mean Training Recall': 0.7624637681159421,
 'Training F1 scores': array([0.66      , 0.6728972 , 0.68571429, 0.7037037 , 0.69902913,
        0.64705882, 0.65454545, 0.66666667, 0.66019417, 0.66666667]),
 'Mean Training F1 Score': 0.6716476098058746,
 'Validation Accuracy scores': array([0.6       , 0.7       , 0.3       , 0.4       , 0.5       ,
        0.7       , 0.7       , 0.6       , 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 58.33333333333333,
 'Validation Precision scores': array([0.75      , 0.66666667, 0.375     , 0.4       , 0.5       ,
        1.        , 0.625     , 0.55555556, 0.8       , 0.6       ]),
 'Mean Validation Precision': 0.6272222222222221,
 'Validation Recall scores': array([0.5, 0.8, 0.6, 0.4, 1. , 0.4, 1. , 1. , 0.8, 0.6]),
 'Mean Validation Recall': 0.7099999999999999,
 'Validation F1 scores': array([0.6       , 0.72727273, 0.46153846, 0.4       , 0.66666667,
        0.57142857, 0.76923077, 0.71428571, 0.8       , 0.6       ]),
 'Mean Validation F1 Score': 0.631042291042291}

1.2 IES + relevant BOLD -> personality#

#y_A Try bold only
#define the model
svm = LinearSVC(max_iter=10000)
#perform cross validation with model

svm_result = cross_validation(svm, X_BOLD_rel_networks, y_A, num_folds)



print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
The main results are 
 Mean Validation F1: 0.538811188811189 
 Mean Validation Accuracy 54.22222222222223
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
{'Training Accuracy scores': array([0.61363636, 0.72727273, 0.63636364, 0.65909091, 0.70454545,
        0.64772727, 0.67045455, 0.69318182, 0.62921348, 0.59550562]),
 'Mean Training Accuracy': 65.76991828396321,
 'Training Precision scores': array([0.63414634, 0.68421053, 0.76      , 0.68292683, 0.69387755,
        0.625     , 0.68181818, 0.66666667, 0.61538462, 0.59183673]),
 'Mean Training Precision': 0.6635867446631247,
 'Training Recall scores': array([0.57777778, 0.86666667, 0.42222222, 0.62222222, 0.75555556,
        0.77777778, 0.66666667, 0.8       , 0.71111111, 0.64444444]),
 'Mean Training Recall': 0.6844444444444445,
 'Training F1 scores': array([0.60465116, 0.76470588, 0.54285714, 0.65116279, 0.72340426,
        0.69306931, 0.6741573 , 0.72727273, 0.65979381, 0.61702128]),
 'Mean Training F1 Score': 0.6658095662620547,
 'Validation Accuracy scores': array([0.8       , 0.4       , 0.6       , 0.5       , 0.6       ,
        0.4       , 0.4       , 0.5       , 0.66666667, 0.55555556]),
 'Mean Validation Accuracy': 54.22222222222223,
 'Validation Precision scores': array([1.        , 0.33333333, 0.66666667, 0.5       , 0.6       ,
        0.42857143, 0.4       , 0.5       , 0.66666667, 0.66666667]),
 'Mean Validation Precision': 0.5761904761904763,
 'Validation Recall scores': array([0.6, 0.2, 0.4, 0.6, 0.6, 0.6, 0.4, 0.8, 0.8, 0.4]),
 'Mean Validation Recall': 0.54,
 'Validation F1 scores': array([0.75      , 0.25      , 0.5       , 0.54545455, 0.6       ,
        0.5       , 0.4       , 0.61538462, 0.72727273, 0.5       ]),
 'Mean Validation F1 Score': 0.538811188811189}
#y_A combined
#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined, y_A, num_folds)



print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.566868686868687 
 Mean Validation Accuracy 55.33333333333334
{'Training Accuracy scores': array([0.75      , 0.77272727, 0.75      , 0.84090909, 0.72727273,
        0.69318182, 0.73863636, 0.76136364, 0.73033708, 0.7752809 ]),
 'Mean Training Accuracy': 75.39708886618999,
 'Training Precision scores': array([0.74468085, 0.77777778, 0.75555556, 0.87804878, 0.75609756,
        0.71428571, 0.775     , 0.77272727, 0.73333333, 0.79069767]),
 'Mean Training Precision': 0.7698204520625502,
 'Training Recall scores': array([0.77777778, 0.77777778, 0.75555556, 0.8       , 0.68888889,
        0.66666667, 0.68888889, 0.75555556, 0.73333333, 0.75555556]),
 'Mean Training Recall': 0.74,
 'Training F1 scores': array([0.76086957, 0.77777778, 0.75555556, 0.8372093 , 0.72093023,
        0.68965517, 0.72941176, 0.76404494, 0.73333333, 0.77272727]),
 'Mean Training F1 Score': 0.7541514920434951,
 'Validation Accuracy scores': array([0.9       , 0.3       , 0.5       , 0.5       , 0.7       ,
        0.4       , 0.4       , 0.5       , 0.66666667, 0.66666667]),
 'Mean Validation Accuracy': 55.33333333333334,
 'Validation Precision scores': array([1.        , 0.25      , 0.5       , 0.5       , 0.66666667,
        0.4       , 0.4       , 0.5       , 0.66666667, 0.75      ]),
 'Mean Validation Precision': 0.5633333333333334,
 'Validation Recall scores': array([0.8, 0.2, 0.6, 0.6, 0.8, 0.4, 0.4, 0.6, 0.8, 0.6]),
 'Mean Validation Recall': 0.58,
 'Validation F1 scores': array([0.88888889, 0.22222222, 0.54545455, 0.54545455, 0.72727273,
        0.4       , 0.4       , 0.54545455, 0.72727273, 0.66666667]),
 'Mean Validation F1 Score': 0.566868686868687}
model_name = "SVM"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            svm_result["Training F1 scores"],
            svm_result["Validation F1 scores"])
_images/test_model_NMA_58_0.png
#y_O combined
#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined, y_O, num_folds)



print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.45732323232323235 
 Mean Validation Accuracy 49.11111111111111
{'Training Accuracy scores': array([0.67045455, 0.70454545, 0.68181818, 0.67045455, 0.69318182,
        0.72727273, 0.70454545, 0.63636364, 0.68539326, 0.66292135]),
 'Mean Training Accuracy': 68.36950970377936,
 'Training Precision scores': array([0.66      , 0.73170732, 0.68888889, 0.68181818, 0.71428571,
        0.73333333, 0.74358974, 0.63265306, 0.72972973, 0.6744186 ]),
 'Mean Training Precision': 0.6990424574594416,
 'Training Recall scores': array([0.73333333, 0.66666667, 0.68888889, 0.66666667, 0.66666667,
        0.73333333, 0.64444444, 0.68888889, 0.6       , 0.64444444]),
 'Mean Training Recall': 0.6733333333333333,
 'Training F1 scores': array([0.69473684, 0.69767442, 0.68888889, 0.6741573 , 0.68965517,
        0.73333333, 0.69047619, 0.65957447, 0.65853659, 0.65909091]),
 'Mean Training F1 Score': 0.6846124111734776,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.2       , 0.5       , 0.2       ,
        0.5       , 0.4       , 0.8       , 0.55555556, 0.55555556]),
 'Mean Validation Accuracy': 49.11111111111111,
 'Validation Precision scores': array([0.57142857, 0.57142857, 0.        , 0.5       , 0.        ,
        0.5       , 0.33333333, 0.8       , 0.6       , 0.6       ]),
 'Mean Validation Precision': 0.44761904761904764,
 'Validation Recall scores': array([0.8, 0.8, 0. , 0.6, 0. , 0.4, 0.2, 0.8, 0.6, 0.6]),
 'Mean Validation Recall': 0.48,
 'Validation F1 scores': array([0.66666667, 0.66666667, 0.        , 0.54545455, 0.        ,
        0.44444444, 0.25      , 0.8       , 0.6       , 0.6       ]),
 'Mean Validation F1 Score': 0.45732323232323235}
#y_C combined
#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5745687645687647 
 Mean Validation Accuracy 51.888888888888886
{'Training Accuracy scores': array([0.68181818, 0.69318182, 0.70454545, 0.70454545, 0.68181818,
        0.65909091, 0.63636364, 0.71590909, 0.71910112, 0.74157303]),
 'Mean Training Accuracy': 69.37946884576098,
 'Training Precision scores': array([0.6875    , 0.71111111, 0.71428571, 0.71428571, 0.69387755,
        0.68085106, 0.64705882, 0.69642857, 0.72916667, 0.75      ]),
 'Mean Training Precision': 0.7024565216157386,
 'Training Recall scores': array([0.7173913 , 0.69565217, 0.74468085, 0.74468085, 0.72340426,
        0.68085106, 0.70212766, 0.82978723, 0.74468085, 0.76595745]),
 'Mean Training Recall': 0.7349213691026827,
 'Training F1 scores': array([0.70212766, 0.7032967 , 0.72916667, 0.72916667, 0.70833333,
        0.68085106, 0.67346939, 0.75728155, 0.73684211, 0.75789474]),
 'Mean Training F1 Score': 0.7178429876626047,
 'Validation Accuracy scores': array([0.9       , 0.3       , 0.5       , 0.8       , 0.4       ,
        0.5       , 0.4       , 0.5       , 0.55555556, 0.33333333]),
 'Mean Validation Accuracy': 51.888888888888886,
 'Validation Precision scores': array([1.        , 0.4       , 0.5       , 0.8       , 0.4       ,
        0.5       , 0.4       , 0.5       , 0.57142857, 0.42857143]),
 'Mean Validation Precision': 0.55,
 'Validation Recall scores': array([0.83333333, 0.33333333, 0.6       , 0.8       , 0.4       ,
        0.8       , 0.4       , 0.6       , 0.8       , 0.6       ]),
 'Mean Validation Recall': 0.6166666666666666,
 'Validation F1 scores': array([0.90909091, 0.36363636, 0.54545455, 0.8       , 0.4       ,
        0.61538462, 0.4       , 0.54545455, 0.66666667, 0.5       ]),
 'Mean Validation F1 Score': 0.5745687645687647}
#y_N combined
#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined, y_N, num_folds)



print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5548701298701297 
 Mean Validation Accuracy 54.0
{'Training Accuracy scores': array([0.69318182, 0.67045455, 0.70454545, 0.76136364, 0.75      ,
        0.70454545, 0.67045455, 0.73863636, 0.71910112, 0.73033708]),
 'Mean Training Accuracy': 71.4262002042901,
 'Training Precision scores': array([0.70833333, 0.6875    , 0.71428571, 0.76      , 0.75510204,
        0.71428571, 0.68085106, 0.74468085, 0.73913043, 0.75555556]),
 'Mean Training Precision': 0.725972470795287,
 'Training Recall scores': array([0.72340426, 0.70212766, 0.74468085, 0.80851064, 0.78723404,
        0.74468085, 0.69565217, 0.76086957, 0.72340426, 0.72340426]),
 'Mean Training Recall': 0.7413968547641074,
 'Training F1 scores': array([0.71578947, 0.69473684, 0.72916667, 0.78350515, 0.77083333,
        0.72916667, 0.68817204, 0.75268817, 0.7311828 , 0.73913043]),
 'Mean Training F1 Score': 0.7334371582630613,
 'Validation Accuracy scores': array([0.4       , 0.7       , 0.5       , 0.5       , 0.5       ,
        0.6       , 0.8       , 0.4       , 0.66666667, 0.33333333]),
 'Mean Validation Accuracy': 54.0,
 'Validation Precision scores': array([0.42857143, 0.66666667, 0.5       , 0.5       , 0.5       ,
        0.57142857, 0.83333333, 0.5       , 0.75      , 0.33333333]),
 'Mean Validation Precision': 0.5583333333333333,
 'Validation Recall scores': array([0.6       , 0.8       , 0.4       , 0.4       , 0.4       ,
        0.8       , 0.83333333, 0.66666667, 0.6       , 0.2       ]),
 'Mean Validation Recall': 0.5700000000000001,
 'Validation F1 scores': array([0.5       , 0.72727273, 0.44444444, 0.44444444, 0.44444444,
        0.66666667, 0.83333333, 0.57142857, 0.66666667, 0.25      ]),
 'Mean Validation F1 Score': 0.5548701298701297}
#y_E combined
#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5316300366300366 
 Mean Validation Accuracy 51.22222222222222
{'Training Accuracy scores': array([0.68181818, 0.68181818, 0.68181818, 0.69318182, 0.69318182,
        0.69318182, 0.67045455, 0.75      , 0.68539326, 0.65168539]),
 'Mean Training Accuracy': 68.82533197139938,
 'Training Precision scores': array([0.68888889, 0.69565217, 0.67307692, 0.67924528, 0.69387755,
        0.68627451, 0.66666667, 0.75      , 0.68      , 0.65306122]),
 'Mean Training Precision': 0.6866743220878515,
 'Training Recall scores': array([0.68888889, 0.69565217, 0.76086957, 0.7826087 , 0.73913043,
        0.76086957, 0.73913043, 0.7826087 , 0.73913043, 0.69565217]),
 'Mean Training Recall': 0.7384541062801933,
 'Training F1 scores': array([0.68888889, 0.69565217, 0.71428571, 0.72727273, 0.71578947,
        0.72164948, 0.70103093, 0.76595745, 0.70833333, 0.67368421]),
 'Mean Training F1 Score': 0.7112544381083878,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.4       , 0.3       , 0.3       ,
        0.8       , 0.4       , 0.6       , 0.66666667, 0.55555556]),
 'Mean Validation Accuracy': 51.22222222222222,
 'Validation Precision scores': array([0.66666667, 0.6       , 0.42857143, 0.25      , 0.375     ,
        1.        , 0.44444444, 0.6       , 0.75      , 0.66666667]),
 'Mean Validation Precision': 0.5781349206349207,
 'Validation Recall scores': array([0.33333333, 0.6       , 0.6       , 0.2       , 0.6       ,
        0.6       , 0.8       , 0.6       , 0.6       , 0.4       ]),
 'Mean Validation Recall': 0.5333333333333333,
 'Validation F1 scores': array([0.44444444, 0.6       , 0.5       , 0.22222222, 0.46153846,
        0.75      , 0.57142857, 0.6       , 0.66666667, 0.5       ]),
 'Mean Validation F1 Score': 0.5316300366300366}

1.3 IES + all networks -> personality#

# (X_combined_all)

#y_A combined
#define the model
svm = LinearSVC(max_iter=900000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined_all, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.4688888888888889 
 Mean Validation Accuracy 47.111111111111114
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.4       , 0.5       , 0.6       , 0.4       , 0.5       ,
        0.4       , 0.4       , 0.4       , 0.55555556, 0.55555556]),
 'Mean Validation Accuracy': 47.111111111111114,
 'Validation Precision scores': array([0.4       , 0.5       , 0.6       , 0.42857143, 0.5       ,
        0.4       , 0.4       , 0.4       , 0.66666667, 0.6       ]),
 'Mean Validation Precision': 0.48952380952380947,
 'Validation Recall scores': array([0.4, 0.4, 0.6, 0.6, 0.4, 0.4, 0.4, 0.4, 0.4, 0.6]),
 'Mean Validation Recall': 0.45999999999999996,
 'Validation F1 scores': array([0.4       , 0.44444444, 0.6       , 0.5       , 0.44444444,
        0.4       , 0.4       , 0.4       , 0.5       , 0.6       ]),
 'Mean Validation F1 Score': 0.4688888888888889}
#y_O
svm = LinearSVC(max_iter=900000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined_all, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5775757575757575 
 Mean Validation Accuracy 57.111111111111114
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.7       , 0.5       , 0.6       , 0.6       , 0.3       ,
        0.3       , 0.8       , 0.8       , 0.44444444, 0.66666667]),
 'Mean Validation Accuracy': 57.111111111111114,
 'Validation Precision scores': array([0.75      , 0.5       , 0.57142857, 0.6       , 0.25      ,
        0.33333333, 0.8       , 0.8       , 0.5       , 0.75      ]),
 'Mean Validation Precision': 0.5854761904761905,
 'Validation Recall scores': array([0.6, 0.6, 0.8, 0.6, 0.2, 0.4, 0.8, 0.8, 0.4, 0.6]),
 'Mean Validation Recall': 0.58,
 'Validation F1 scores': array([0.66666667, 0.54545455, 0.66666667, 0.6       , 0.22222222,
        0.36363636, 0.8       , 0.8       , 0.44444444, 0.66666667]),
 'Mean Validation F1 Score': 0.5775757575757575}
#y_C
svm = LinearSVC(max_iter=900000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined_all, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5012987012987014 
 Mean Validation Accuracy 52.666666666666664
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.4       , 0.4       , 0.8       ,
        0.7       , 0.6       , 0.5       , 0.55555556, 0.11111111]),
 'Mean Validation Accuracy': 52.666666666666664,
 'Validation Precision scores': array([1.        , 0.75      , 0.4       , 0.42857143, 0.71428571,
        0.66666667, 0.57142857, 0.5       , 0.66666667, 0.        ]),
 'Mean Validation Precision': 0.5697619047619048,
 'Validation Recall scores': array([0.33333333, 0.5       , 0.4       , 0.6       , 1.        ,
        0.8       , 0.8       , 0.2       , 0.4       , 0.        ]),
 'Mean Validation Recall': 0.5033333333333333,
 'Validation F1 scores': array([0.5       , 0.6       , 0.4       , 0.5       , 0.83333333,
        0.72727273, 0.66666667, 0.28571429, 0.5       , 0.        ]),
 'Mean Validation F1 Score': 0.5012987012987014}
#y_N
svm = LinearSVC(max_iter=900000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined_all, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5911111111111111 
 Mean Validation Accuracy 58.22222222222222
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.9       , 0.5       , 0.8       , 0.5       , 0.6       ,
        0.4       , 0.4       , 0.5       , 0.77777778, 0.44444444]),
 'Mean Validation Accuracy': 58.22222222222222,
 'Validation Precision scores': array([0.83333333, 0.5       , 0.8       , 0.5       , 0.6       ,
        0.4       , 0.5       , 0.6       , 1.        , 0.5       ]),
 'Mean Validation Precision': 0.6233333333333334,
 'Validation Recall scores': array([1.        , 0.4       , 0.8       , 1.        , 0.6       ,
        0.4       , 0.16666667, 0.5       , 0.6       , 0.6       ]),
 'Mean Validation Recall': 0.6066666666666667,
 'Validation F1 scores': array([0.90909091, 0.44444444, 0.8       , 0.66666667, 0.6       ,
        0.4       , 0.25      , 0.54545455, 0.75      , 0.54545455]),
 'Mean Validation F1 Score': 0.5911111111111111}
#y_E
svm = LinearSVC(max_iter=900000)
#perform cross validation with model

svm_result = cross_validation(svm, X_combined_all, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")

svm_result
The main results are 
 Mean Validation F1: 0.5813919413919414 
 Mean Validation Accuracy 57.44444444444443
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.6       , 0.3       , 0.6       , 0.7       , 0.7       ,
        0.6       , 0.5       , 0.3       , 0.66666667, 0.77777778]),
 'Mean Validation Accuracy': 57.44444444444443,
 'Validation Precision scores': array([0.75      , 0.375     , 0.57142857, 0.66666667, 0.66666667,
        0.55555556, 0.5       , 0.        , 1.        , 0.8       ]),
 'Mean Validation Precision': 0.5885317460317461,
 'Validation Recall scores': array([0.5, 0.6, 0.8, 0.8, 0.8, 1. , 0.6, 0. , 0.4, 0.8]),
 'Mean Validation Recall': 0.63,
 'Validation F1 scores': array([0.6       , 0.46153846, 0.66666667, 0.72727273, 0.72727273,
        0.71428571, 0.54545455, 0.        , 0.57142857, 0.8       ]),
 'Mean Validation F1 Score': 0.5813919413919414}
model_name = "SVM"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            svm_result["Training F1 scores"],
            svm_result["Validation F1 scores"])
_images/test_model_NMA_69_0.png

2.1 shuffled IES -> personality#

#define the model #X_IES_random
svm = LinearSVC(max_iter=100000)
#perform cross validation with model


#y_A
svm_result = cross_validation(svm, X_IES_random, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.534978354978355 
 Mean Validation Accuracy 56.33333333333334
{'Training Accuracy scores': array([0.61363636, 0.61363636, 0.60227273, 0.61363636, 0.65909091,
        0.60227273, 0.60227273, 0.59090909, 0.61797753, 0.57303371]),
 'Mean Training Accuracy': 60.8873850868233,
 'Training Precision scores': array([0.62790698, 0.65714286, 0.61904762, 0.63414634, 0.7027027 ,
        0.61904762, 0.63157895, 0.61538462, 0.64102564, 0.58974359]),
 'Mean Training Precision': 0.6337726909670667,
 'Training Recall scores': array([0.6       , 0.51111111, 0.57777778, 0.57777778, 0.57777778,
        0.57777778, 0.53333333, 0.53333333, 0.55555556, 0.51111111]),
 'Mean Training Recall': 0.5555555555555556,
 'Training F1 scores': array([0.61363636, 0.575     , 0.59770115, 0.60465116, 0.63414634,
        0.59770115, 0.57831325, 0.57142857, 0.5952381 , 0.54761905]),
 'Mean Training F1 Score': 0.5915435134038812,
 'Validation Accuracy scores': array([0.8       , 0.5       , 0.4       , 0.4       , 0.4       ,
        0.4       , 0.7       , 0.7       , 0.44444444, 0.88888889]),
 'Mean Validation Accuracy': 56.33333333333334,
 'Validation Precision scores': array([0.8       , 0.5       , 0.42857143, 0.44444444, 0.33333333,
        0.33333333, 1.        , 0.75      , 0.5       , 0.83333333]),
 'Mean Validation Precision': 0.5923015873015873,
 'Validation Recall scores': array([0.8, 0.6, 0.6, 0.8, 0.2, 0.2, 0.4, 0.6, 0.2, 1. ]),
 'Mean Validation Recall': 0.5399999999999999,
 'Validation F1 scores': array([0.8       , 0.54545455, 0.5       , 0.57142857, 0.25      ,
        0.25      , 0.57142857, 0.66666667, 0.28571429, 0.90909091]),
 'Mean Validation F1 Score': 0.534978354978355}
#y_O
svm = LinearSVC(max_iter=100000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_random, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.557965367965368 
 Mean Validation Accuracy 59.77777777777777
{'Training Accuracy scores': array([0.63636364, 0.65909091, 0.71590909, 0.625     , 0.68181818,
        0.64772727, 0.72727273, 0.70454545, 0.68539326, 0.66292135]),
 'Mean Training Accuracy': 67.46041879468847,
 'Training Precision scores': array([0.66666667, 0.7027027 , 0.76315789, 0.65      , 0.69767442,
        0.68421053, 0.76923077, 0.74358974, 0.72972973, 0.7027027 ]),
 'Mean Training Precision': 0.7109665154279597,
 'Training Recall scores': array([0.57777778, 0.57777778, 0.64444444, 0.57777778, 0.66666667,
        0.57777778, 0.66666667, 0.64444444, 0.6       , 0.57777778]),
 'Mean Training Recall': 0.611111111111111,
 'Training F1 scores': array([0.61904762, 0.63414634, 0.69879518, 0.61176471, 0.68181818,
        0.62650602, 0.71428571, 0.69047619, 0.65853659, 0.63414634]),
 'Mean Training F1 Score': 0.6569522884622018,
 'Validation Accuracy scores': array([0.7       , 0.7       , 0.6       , 0.8       , 0.4       ,
        0.8       , 0.4       , 0.8       , 0.44444444, 0.33333333]),
 'Mean Validation Accuracy': 59.77777777777777,
 'Validation Precision scores': array([0.75      , 0.66666667, 0.6       , 0.8       , 0.33333333,
        0.8       , 0.33333333, 0.8       , 0.5       , 0.4       ]),
 'Mean Validation Precision': 0.5983333333333334,
 'Validation Recall scores': array([0.6, 0.8, 0.6, 0.8, 0.2, 0.8, 0.2, 0.8, 0.2, 0.4]),
 'Mean Validation Recall': 0.54,
 'Validation F1 scores': array([0.66666667, 0.72727273, 0.6       , 0.8       , 0.25      ,
        0.8       , 0.25      , 0.8       , 0.28571429, 0.4       ]),
 'Mean Validation F1 Score': 0.557965367965368}
#y_C
svm = LinearSVC(max_iter=100000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_random, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.5677489177489178 
 Mean Validation Accuracy 50.777777777777786
{'Training Accuracy scores': array([0.61363636, 0.625     , 0.61363636, 0.59090909, 0.625     ,
        0.625     , 0.59090909, 0.57954545, 0.66292135, 0.65168539]),
 'Mean Training Accuracy': 61.78243105209397,
 'Training Precision scores': array([0.61538462, 0.62264151, 0.61016949, 0.59322034, 0.625     ,
        0.625     , 0.59322034, 0.59615385, 0.64912281, 0.63793103]),
 'Mean Training Precision': 0.6167843981964252,
 'Training Recall scores': array([0.69565217, 0.7173913 , 0.76595745, 0.74468085, 0.74468085,
        0.74468085, 0.74468085, 0.65957447, 0.78723404, 0.78723404]),
 'Mean Training Recall': 0.7391766882516189,
 'Training F1 scores': array([0.65306122, 0.66666667, 0.67924528, 0.66037736, 0.67961165,
        0.67961165, 0.66037736, 0.62626263, 0.71153846, 0.7047619 ]),
 'Mean Training F1 Score': 0.6721514184690329,
 'Validation Accuracy scores': array([0.6       , 0.4       , 0.7       , 0.4       , 0.6       ,
        0.5       , 0.5       , 0.6       , 0.22222222, 0.55555556]),
 'Mean Validation Accuracy': 50.777777777777786,
 'Validation Precision scores': array([0.66666667, 0.5       , 0.66666667, 0.44444444, 0.57142857,
        0.5       , 0.5       , 0.57142857, 0.25      , 0.57142857]),
 'Mean Validation Precision': 0.5242063492063492,
 'Validation Recall scores': array([0.66666667, 0.5       , 0.8       , 0.8       , 0.8       ,
        0.6       , 0.4       , 0.8       , 0.2       , 0.8       ]),
 'Mean Validation Recall': 0.6366666666666667,
 'Validation F1 scores': array([0.66666667, 0.5       , 0.72727273, 0.57142857, 0.66666667,
        0.54545455, 0.44444444, 0.66666667, 0.22222222, 0.66666667]),
 'Mean Validation F1 Score': 0.5677489177489178}
#y_N
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_random, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.5483605283605283 
 Mean Validation Accuracy 50.22222222222222
{'Training Accuracy scores': array([0.61363636, 0.57954545, 0.60227273, 0.53409091, 0.59090909,
        0.56818182, 0.51136364, 0.55681818, 0.58426966, 0.56179775]),
 'Mean Training Accuracy': 57.02885597548518,
 'Training Precision scores': array([0.63265306, 0.58928571, 0.60344828, 0.55172414, 0.59322034,
        0.58181818, 0.52727273, 0.56140351, 0.59259259, 0.57142857]),
 'Mean Training Precision': 0.5804847110170361,
 'Training Recall scores': array([0.65957447, 0.70212766, 0.74468085, 0.68085106, 0.74468085,
        0.68085106, 0.63043478, 0.69565217, 0.68085106, 0.68085106]),
 'Mean Training Recall': 0.6900555041628122,
 'Training F1 scores': array([0.64583333, 0.6407767 , 0.66666667, 0.60952381, 0.66037736,
        0.62745098, 0.57425743, 0.62135922, 0.63366337, 0.62135922]),
 'Mean Training F1 Score': 0.6301268086116807,
 'Validation Accuracy scores': array([0.4       , 0.4       , 0.2       , 0.7       , 0.4       ,
        0.7       , 0.5       , 0.5       , 0.55555556, 0.66666667]),
 'Mean Validation Accuracy': 50.22222222222222,
 'Validation Precision scores': array([0.42857143, 0.42857143, 0.2       , 0.66666667, 0.4       ,
        0.625     , 0.66666667, 0.57142857, 0.6       , 0.66666667]),
 'Mean Validation Precision': 0.5253571428571429,
 'Validation Recall scores': array([0.6       , 0.6       , 0.2       , 0.8       , 0.4       ,
        1.        , 0.33333333, 0.66666667, 0.6       , 0.8       ]),
 'Mean Validation Recall': 0.5999999999999999,
 'Validation F1 scores': array([0.5       , 0.5       , 0.2       , 0.72727273, 0.4       ,
        0.76923077, 0.44444444, 0.61538462, 0.6       , 0.72727273]),
 'Mean Validation F1 Score': 0.5483605283605283}
#y_E
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_random, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
The main results are 
 Mean Validation F1: 0.5953535353535353 
 Mean Validation Accuracy 60.44444444444444
{'Training Accuracy scores': array([0.68181818, 0.67045455, 0.68181818, 0.63636364, 0.75      ,
        0.64772727, 0.63636364, 0.64772727, 0.69662921, 0.65168539]),
 'Mean Training Accuracy': 67.00587334014301,
 'Training Precision scores': array([0.70731707, 0.68085106, 0.69565217, 0.66666667, 0.76086957,
        0.65306122, 0.68421053, 0.65957447, 0.72093023, 0.64705882]),
 'Mean Training Precision': 0.6876191817775863,
 'Training Recall scores': array([0.64444444, 0.69565217, 0.69565217, 0.60869565, 0.76086957,
        0.69565217, 0.56521739, 0.67391304, 0.67391304, 0.7173913 ]),
 'Mean Training Recall': 0.6731400966183575,
 'Training F1 scores': array([0.6744186 , 0.68817204, 0.69565217, 0.63636364, 0.76086957,
        0.67368421, 0.61904762, 0.66666667, 0.69662921, 0.68041237]),
 'Mean Training F1 Score': 0.6791916104013757,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.6       , 0.6       , 0.4       ,
        0.6       , 0.5       , 0.7       , 0.55555556, 0.88888889]),
 'Mean Validation Accuracy': 60.44444444444444,
 'Validation Precision scores': array([0.66666667, 0.6       , 0.6       , 0.57142857, 0.4       ,
        0.66666667, 0.5       , 0.75      , 0.66666667, 0.83333333]),
 'Mean Validation Precision': 0.6254761904761905,
 'Validation Recall scores': array([0.66666667, 0.6       , 0.6       , 0.8       , 0.4       ,
        0.4       , 0.4       , 0.6       , 0.4       , 1.        ]),
 'Mean Validation Recall': 0.5866666666666667,
 'Validation F1 scores': array([0.66666667, 0.6       , 0.6       , 0.66666667, 0.4       ,
        0.5       , 0.44444444, 0.66666667, 0.5       , 0.90909091]),
 'Mean Validation F1 Score': 0.5953535353535353}

2.2 IES + shuffled BOLD -> personality#

#y_A
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_BOLD_shuf, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
The main results are 
 Mean Validation F1: 0.5468181818181819 
 Mean Validation Accuracy 53.222222222222214
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
{'Training Accuracy scores': array([0.71590909, 0.78409091, 0.76136364, 0.80681818, 0.72727273,
        0.71590909, 0.72727273, 0.77272727, 0.73033708, 0.7752809 ]),
 'Mean Training Accuracy': 75.16981613891727,
 'Training Precision scores': array([0.73809524, 0.7826087 , 0.77272727, 0.81818182, 0.75609756,
        0.72727273, 0.75609756, 0.79069767, 0.73333333, 0.79069767]),
 'Mean Training Precision': 0.7665809556050992,
 'Training Recall scores': array([0.68888889, 0.8       , 0.75555556, 0.8       , 0.68888889,
        0.71111111, 0.68888889, 0.75555556, 0.73333333, 0.75555556]),
 'Mean Training Recall': 0.7377777777777778,
 'Training F1 scores': array([0.71264368, 0.79120879, 0.76404494, 0.80898876, 0.72093023,
        0.71910112, 0.72093023, 0.77272727, 0.73333333, 0.77272727]),
 'Mean Training F1 Score': 0.7516635644734542,
 'Validation Accuracy scores': array([0.8       , 0.5       , 0.5       , 0.5       , 0.7       ,
        0.4       , 0.3       , 0.4       , 0.55555556, 0.66666667]),
 'Mean Validation Accuracy': 53.222222222222214,
 'Validation Precision scores': array([1.        , 0.5       , 0.5       , 0.5       , 0.66666667,
        0.4       , 0.25      , 0.42857143, 0.57142857, 0.75      ]),
 'Mean Validation Precision': 0.5566666666666666,
 'Validation Recall scores': array([0.6, 0.4, 0.6, 0.6, 0.8, 0.4, 0.2, 0.6, 0.8, 0.6]),
 'Mean Validation Recall': 0.5599999999999999,
 'Validation F1 scores': array([0.75      , 0.44444444, 0.54545455, 0.54545455, 0.72727273,
        0.4       , 0.22222222, 0.5       , 0.66666667, 0.66666667]),
 'Mean Validation F1 Score': 0.5468181818181819}
#y_O
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_BOLD_shuf, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
The main results are 
 Mean Validation F1: 0.4716666666666667 
 Mean Validation Accuracy 50.22222222222222
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
{'Training Accuracy scores': array([0.67045455, 0.68181818, 0.67045455, 0.64772727, 0.70454545,
        0.70454545, 0.70454545, 0.61363636, 0.68539326, 0.66292135]),
 'Mean Training Accuracy': 67.46041879468845,
 'Training Precision scores': array([0.67391304, 0.70731707, 0.67391304, 0.66666667, 0.74358974,
        0.72093023, 0.74358974, 0.62222222, 0.71794872, 0.69230769]),
 'Mean Training Precision': 0.696239817901018,
 'Training Recall scores': array([0.68888889, 0.64444444, 0.68888889, 0.62222222, 0.64444444,
        0.68888889, 0.64444444, 0.62222222, 0.62222222, 0.6       ]),
 'Mean Training Recall': 0.6466666666666667,
 'Training F1 scores': array([0.68131868, 0.6744186 , 0.68131868, 0.64367816, 0.69047619,
        0.70454545, 0.69047619, 0.62222222, 0.66666667, 0.64285714]),
 'Mean Training F1 Score': 0.6697977995451934,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.4       , 0.4       , 0.2       ,
        0.5       , 0.3       , 0.8       , 0.55555556, 0.66666667]),
 'Mean Validation Accuracy': 50.22222222222222,
 'Validation Precision scores': array([0.57142857, 0.57142857, 0.33333333, 0.4       , 0.        ,
        0.5       , 0.25      , 0.8       , 0.6       , 0.75      ]),
 'Mean Validation Precision': 0.4776190476190476,
 'Validation Recall scores': array([0.8, 0.8, 0.2, 0.4, 0. , 0.4, 0.2, 0.8, 0.6, 0.6]),
 'Mean Validation Recall': 0.48,
 'Validation F1 scores': array([0.66666667, 0.66666667, 0.25      , 0.4       , 0.        ,
        0.44444444, 0.22222222, 0.8       , 0.6       , 0.66666667]),
 'Mean Validation F1 Score': 0.4716666666666667}
#y_C
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_BOLD_shuf, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
The main results are 
 Mean Validation F1: 0.5663869463869464 
 Mean Validation Accuracy 51.77777777777778
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
{'Training Accuracy scores': array([0.68181818, 0.70454545, 0.69318182, 0.70454545, 0.70454545,
        0.63636364, 0.64772727, 0.72727273, 0.70786517, 0.76404494]),
 'Mean Training Accuracy': 69.71910112359551,
 'Training Precision scores': array([0.6875    , 0.7173913 , 0.7       , 0.71428571, 0.71428571,
        0.65957447, 0.65384615, 0.70909091, 0.72340426, 0.77083333]),
 'Mean Training Precision': 0.7050211852593906,
 'Training Recall scores': array([0.7173913 , 0.7173913 , 0.74468085, 0.74468085, 0.74468085,
        0.65957447, 0.72340426, 0.82978723, 0.72340426, 0.78723404]),
 'Mean Training Recall': 0.7392229417206291,
 'Training F1 scores': array([0.70212766, 0.7173913 , 0.72164948, 0.72916667, 0.72916667,
        0.65957447, 0.68686869, 0.76470588, 0.72340426, 0.77894737]),
 'Mean Training F1 Score': 0.7213002442838645,
 'Validation Accuracy scores': array([0.9       , 0.3       , 0.5       , 0.8       , 0.4       ,
        0.5       , 0.4       , 0.6       , 0.55555556, 0.22222222]),
 'Mean Validation Accuracy': 51.77777777777778,
 'Validation Precision scores': array([1.        , 0.4       , 0.5       , 0.8       , 0.4       ,
        0.5       , 0.4       , 0.6       , 0.57142857, 0.33333333]),
 'Mean Validation Precision': 0.5504761904761903,
 'Validation Recall scores': array([0.83333333, 0.33333333, 0.6       , 0.8       , 0.4       ,
        0.8       , 0.4       , 0.6       , 0.8       , 0.4       ]),
 'Mean Validation Recall': 0.5966666666666667,
 'Validation F1 scores': array([0.90909091, 0.36363636, 0.54545455, 0.8       , 0.4       ,
        0.61538462, 0.4       , 0.6       , 0.66666667, 0.36363636]),
 'Mean Validation F1 Score': 0.5663869463869464}
#y_N
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_BOLD_shuf, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
The main results are 
 Mean Validation F1: 0.5371212121212121 
 Mean Validation Accuracy 53.0
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
{'Training Accuracy scores': array([0.71590909, 0.67045455, 0.71590909, 0.75      , 0.75      ,
        0.70454545, 0.65909091, 0.70454545, 0.71910112, 0.70786517]),
 'Mean Training Accuracy': 70.97420837589377,
 'Training Precision scores': array([0.73913043, 0.6875    , 0.72916667, 0.74509804, 0.76595745,
        0.73333333, 0.66666667, 0.72727273, 0.73913043, 0.72340426]),
 'Mean Training Precision': 0.7256660004847958,
 'Training Recall scores': array([0.72340426, 0.70212766, 0.74468085, 0.80851064, 0.76595745,
        0.70212766, 0.69565217, 0.69565217, 0.72340426, 0.72340426]),
 'Mean Training Recall': 0.7284921369102684,
 'Training F1 scores': array([0.7311828 , 0.69473684, 0.73684211, 0.7755102 , 0.76595745,
        0.7173913 , 0.68085106, 0.71111111, 0.7311828 , 0.72340426]),
 'Mean Training F1 Score': 0.7268169924264287,
 'Validation Accuracy scores': array([0.4       , 0.7       , 0.5       , 0.5       , 0.5       ,
        0.6       , 0.7       , 0.4       , 0.66666667, 0.33333333]),
 'Mean Validation Accuracy': 53.0,
 'Validation Precision scores': array([0.42857143, 0.66666667, 0.5       , 0.5       , 0.5       ,
        0.57142857, 0.8       , 0.5       , 0.75      , 0.33333333]),
 'Mean Validation Precision': 0.5549999999999999,
 'Validation Recall scores': array([0.6       , 0.8       , 0.4       , 0.4       , 0.4       ,
        0.8       , 0.66666667, 0.5       , 0.6       , 0.2       ]),
 'Mean Validation Recall': 0.5366666666666666,
 'Validation F1 scores': array([0.5       , 0.72727273, 0.44444444, 0.44444444, 0.44444444,
        0.66666667, 0.72727273, 0.5       , 0.66666667, 0.25      ]),
 'Mean Validation F1 Score': 0.5371212121212121}
#y_E
svm = LinearSVC(max_iter=90000)
#perform cross validation with model


svm_result = cross_validation(svm, X_IES_BOLD_shuf, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
svm_result
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
The main results are 
 Mean Validation F1: 0.5428421578421577 
 Mean Validation Accuracy 51.33333333333334
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
{'Training Accuracy scores': array([0.65909091, 0.67045455, 0.69318182, 0.69318182, 0.70454545,
        0.69318182, 0.68181818, 0.76136364, 0.68539326, 0.66292135]),
 'Mean Training Accuracy': 69.05132788559754,
 'Training Precision scores': array([0.65957447, 0.68085106, 0.69387755, 0.67924528, 0.69230769,
        0.68627451, 0.68      , 0.75510204, 0.68      , 0.66666667]),
 'Mean Training Precision': 0.6873899275548776,
 'Training Recall scores': array([0.68888889, 0.69565217, 0.73913043, 0.7826087 , 0.7826087 ,
        0.76086957, 0.73913043, 0.80434783, 0.73913043, 0.69565217]),
 'Mean Training Recall': 0.7428019323671498,
 'Training F1 scores': array([0.67391304, 0.68817204, 0.71578947, 0.72727273, 0.73469388,
        0.72164948, 0.70833333, 0.77894737, 0.70833333, 0.68085106]),
 'Mean Training F1 Score': 0.713795574845056,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.4       , 0.3       , 0.3       ,
        0.8       , 0.4       , 0.5       , 0.66666667, 0.66666667]),
 'Mean Validation Accuracy': 51.33333333333334,
 'Validation Precision scores': array([0.66666667, 0.6       , 0.42857143, 0.25      , 0.375     ,
        1.        , 0.44444444, 0.5       , 0.75      , 0.75      ]),
 'Mean Validation Precision': 0.576468253968254,
 'Validation Recall scores': array([0.33333333, 0.6       , 0.6       , 0.2       , 0.6       ,
        0.6       , 0.8       , 0.6       , 0.6       , 0.6       ]),
 'Mean Validation Recall': 0.5533333333333332,
 'Validation F1 scores': array([0.44444444, 0.6       , 0.5       , 0.22222222, 0.46153846,
        0.75      , 0.57142857, 0.54545455, 0.66666667, 0.66666667]),
 'Mean Validation F1 Score': 0.5428421578421577}

Logistic Regression#


1.1 IES -> personality#

from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model

#y_A
logis_result = cross_validation(logis_reg_model, X_IES, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.6001554001554001 
 Mean Validation Accuracy 56.33333333333332
{'Training Accuracy scores': array([0.64772727, 0.67045455, 0.70454545, 0.64772727, 0.67045455,
        0.69318182, 0.68181818, 0.65909091, 0.60674157, 0.64044944]),
 'Mean Training Accuracy': 66.22191011235955,
 'Training Precision scores': array([0.64583333, 0.65384615, 0.70212766, 0.65217391, 0.68181818,
        0.6875    , 0.66666667, 0.66666667, 0.60416667, 0.63829787]),
 'Mean Training Precision': 0.6599097113956041,
 'Training Recall scores': array([0.68888889, 0.75555556, 0.73333333, 0.66666667, 0.66666667,
        0.73333333, 0.75555556, 0.66666667, 0.64444444, 0.66666667]),
 'Mean Training Recall': 0.6977777777777778,
 'Training F1 scores': array([0.66666667, 0.70103093, 0.7173913 , 0.65934066, 0.6741573 ,
        0.70967742, 0.70833333, 0.66666667, 0.62365591, 0.65217391]),
 'Mean Training F1 Score': 0.6779094107937802,
 'Validation Accuracy scores': array([0.6       , 0.5       , 0.3       , 0.5       , 0.7       ,
        0.5       , 0.6       , 0.6       , 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 56.33333333333332,
 'Validation Precision scores': array([0.6       , 0.5       , 0.33333333, 0.5       , 0.625     ,
        0.5       , 0.57142857, 0.57142857, 0.8       , 0.6       ]),
 'Mean Validation Precision': 0.5601190476190475,
 'Validation Recall scores': array([0.6, 0.4, 0.4, 0.6, 1. , 0.6, 0.8, 0.8, 0.8, 0.6]),
 'Mean Validation Recall': 0.6599999999999999,
 'Validation F1 scores': array([0.6       , 0.44444444, 0.36363636, 0.54545455, 0.76923077,
        0.54545455, 0.66666667, 0.66666667, 0.8       , 0.6       ]),
 'Mean Validation F1 Score': 0.6001554001554001}
#y_O

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5536285936285936 
 Mean Validation Accuracy 52.33333333333333
{'Training Accuracy scores': array([0.57954545, 0.59090909, 0.59090909, 0.625     , 0.61363636,
        0.56818182, 0.60227273, 0.55681818, 0.56179775, 0.5505618 ]),
 'Mean Training Accuracy': 58.396322778345244,
 'Training Precision scores': array([0.58695652, 0.58823529, 0.58823529, 0.61538462, 0.62790698,
        0.58139535, 0.61363636, 0.55769231, 0.56521739, 0.55102041]),
 'Mean Training Precision': 0.5875680521736719,
 'Training Recall scores': array([0.6       , 0.66666667, 0.66666667, 0.71111111, 0.6       ,
        0.55555556, 0.6       , 0.64444444, 0.57777778, 0.6       ]),
 'Mean Training Recall': 0.6222222222222221,
 'Training F1 scores': array([0.59340659, 0.625     , 0.625     , 0.65979381, 0.61363636,
        0.56818182, 0.60674157, 0.59793814, 0.57142857, 0.57446809]),
 'Mean Training F1 Score': 0.6035594963556323,
 'Validation Accuracy scores': array([0.3       , 0.6       , 0.8       , 0.5       , 0.3       ,
        0.4       , 0.4       , 0.6       , 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 52.33333333333333,
 'Validation Precision scores': array([0.375     , 0.66666667, 0.8       , 0.5       , 0.33333333,
        0.42857143, 0.4       , 0.6       , 0.8       , 0.57142857]),
 'Mean Validation Precision': 0.5475,
 'Validation Recall scores': array([0.6, 0.4, 0.8, 0.4, 0.4, 0.6, 0.4, 0.6, 0.8, 0.8]),
 'Mean Validation Recall': 0.58,
 'Validation F1 scores': array([0.46153846, 0.5       , 0.8       , 0.44444444, 0.36363636,
        0.5       , 0.4       , 0.6       , 0.8       , 0.66666667]),
 'Mean Validation F1 Score': 0.5536285936285936}
#y_C

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5348140748140748 
 Mean Validation Accuracy 43.88888888888889
{'Training Accuracy scores': array([0.57954545, 0.57954545, 0.61363636, 0.52272727, 0.54545455,
        0.52272727, 0.59090909, 0.59090909, 0.58426966, 0.62921348]),
 'Mean Training Accuracy': 57.5893769152196,
 'Training Precision scores': array([0.57377049, 0.58181818, 0.60655738, 0.53731343, 0.55737705,
        0.54385965, 0.60377358, 0.59322034, 0.58064516, 0.62068966]),
 'Mean Training Precision': 0.5799024922161045,
 'Training Recall scores': array([0.76086957, 0.69565217, 0.78723404, 0.76595745, 0.72340426,
        0.65957447, 0.68085106, 0.74468085, 0.76595745, 0.76595745]),
 'Mean Training Recall': 0.735013876040703,
 'Training F1 scores': array([0.65420561, 0.63366337, 0.68518519, 0.63157895, 0.62962963,
        0.59615385, 0.64      , 0.66037736, 0.66055046, 0.68571429]),
 'Mean Training F1 Score': 0.6477058685070799,
 'Validation Accuracy scores': array([0.6       , 0.2       , 0.4       , 0.6       , 0.4       ,
        0.5       , 0.4       , 0.4       , 0.44444444, 0.44444444]),
 'Mean Validation Accuracy': 43.88888888888889,
 'Validation Precision scores': array([0.75      , 0.33333333, 0.42857143, 0.57142857, 0.44444444,
        0.5       , 0.42857143, 0.44444444, 0.5       , 0.5       ]),
 'Mean Validation Precision': 0.490079365079365,
 'Validation Recall scores': array([0.5       , 0.33333333, 0.6       , 0.8       , 0.8       ,
        0.8       , 0.6       , 0.8       , 0.4       , 0.6       ]),
 'Mean Validation Recall': 0.6233333333333333,
 'Validation F1 scores': array([0.6       , 0.33333333, 0.5       , 0.66666667, 0.57142857,
        0.61538462, 0.5       , 0.57142857, 0.44444444, 0.54545455]),
 'Mean Validation F1 Score': 0.5348140748140748}
#y_N

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5056277056277056 
 Mean Validation Accuracy 48.111111111111114
{'Training Accuracy scores': array([0.60227273, 0.60227273, 0.57954545, 0.61363636, 0.59090909,
        0.60227273, 0.61363636, 0.61363636, 0.60674157, 0.58426966]),
 'Mean Training Accuracy': 60.09193054136874,
 'Training Precision scores': array([0.625     , 0.62      , 0.59615385, 0.63265306, 0.61702128,
        0.63043478, 0.63043478, 0.63043478, 0.625     , 0.59615385]),
 'Mean Training Precision': 0.6203286377954014,
 'Training Recall scores': array([0.63829787, 0.65957447, 0.65957447, 0.65957447, 0.61702128,
        0.61702128, 0.63043478, 0.63043478, 0.63829787, 0.65957447]),
 'Mean Training Recall': 0.6409805735430156,
 'Training F1 scores': array([0.63157895, 0.63917526, 0.62626263, 0.64583333, 0.61702128,
        0.62365591, 0.63043478, 0.63043478, 0.63157895, 0.62626263]),
 'Mean Training F1 Score': 0.6302238494119017,
 'Validation Accuracy scores': array([0.5       , 0.4       , 0.3       , 0.5       , 0.6       ,
        0.4       , 0.6       , 0.4       , 0.44444444, 0.66666667]),
 'Mean Validation Accuracy': 48.111111111111114,
 'Validation Precision scores': array([0.5       , 0.42857143, 0.25      , 0.5       , 0.66666667,
        0.44444444, 1.        , 0.5       , 0.5       , 0.66666667]),
 'Mean Validation Precision': 0.5456349206349207,
 'Validation Recall scores': array([0.6       , 0.6       , 0.2       , 0.6       , 0.4       ,
        0.8       , 0.33333333, 0.5       , 0.4       , 0.8       ]),
 'Mean Validation Recall': 0.5233333333333333,
 'Validation F1 scores': array([0.54545455, 0.5       , 0.22222222, 0.54545455, 0.5       ,
        0.57142857, 0.5       , 0.5       , 0.44444444, 0.72727273]),
 'Mean Validation F1 Score': 0.5056277056277056}
#y_E

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5864657564657565 
 Mean Validation Accuracy 52.22222222222223
{'Training Accuracy scores': array([0.60227273, 0.57954545, 0.625     , 0.60227273, 0.63636364,
        0.59090909, 0.55681818, 0.625     , 0.59550562, 0.62921348]),
 'Mean Training Accuracy': 60.42900919305414,
 'Training Precision scores': array([0.59259259, 0.57627119, 0.61016949, 0.59322034, 0.625     ,
        0.59615385, 0.55737705, 0.62264151, 0.58928571, 0.61403509]),
 'Mean Training Precision': 0.5976746816314893,
 'Training Recall scores': array([0.71111111, 0.73913043, 0.7826087 , 0.76086957, 0.76086957,
        0.67391304, 0.73913043, 0.7173913 , 0.7173913 , 0.76086957]),
 'Mean Training Recall': 0.736328502415459,
 'Training F1 scores': array([0.64646465, 0.64761905, 0.68571429, 0.66666667, 0.68627451,
        0.63265306, 0.63551402, 0.66666667, 0.64705882, 0.67961165]),
 'Mean Training F1 Score': 0.6594243376866162,
 'Validation Accuracy scores': array([0.7       , 0.6       , 0.3       , 0.3       , 0.4       ,
        0.6       , 0.6       , 0.5       , 0.77777778, 0.44444444]),
 'Mean Validation Accuracy': 52.22222222222223,
 'Validation Precision scores': array([0.8       , 0.57142857, 0.375     , 0.33333333, 0.44444444,
        0.66666667, 0.55555556, 0.5       , 0.8       , 0.5       ]),
 'Mean Validation Precision': 0.5546428571428572,
 'Validation Recall scores': array([0.66666667, 0.8       , 0.6       , 0.4       , 0.8       ,
        0.4       , 1.        , 0.8       , 0.8       , 0.4       ]),
 'Mean Validation Recall': 0.6666666666666667,
 'Validation F1 scores': array([0.72727273, 0.66666667, 0.46153846, 0.36363636, 0.57142857,
        0.5       , 0.71428571, 0.61538462, 0.8       , 0.44444444]),
 'Mean Validation F1 Score': 0.5864657564657565}

1.2 IES + relevant BOLD -> personality#

#y_A

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_combined, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5623232323232324 
 Mean Validation Accuracy 54.33333333333334
{'Training Accuracy scores': array([0.72727273, 0.76136364, 0.76136364, 0.81818182, 0.72727273,
        0.70454545, 0.71590909, 0.77272727, 0.73033708, 0.76404494]),
 'Mean Training Accuracy': 74.83018386108274,
 'Training Precision scores': array([0.73333333, 0.76086957, 0.75      , 0.8372093 , 0.75609756,
        0.72093023, 0.73809524, 0.77777778, 0.73333333, 0.78571429]),
 'Mean Training Precision': 0.759336062933069,
 'Training Recall scores': array([0.73333333, 0.77777778, 0.8       , 0.8       , 0.68888889,
        0.68888889, 0.68888889, 0.77777778, 0.73333333, 0.73333333]),
 'Mean Training Recall': 0.7422222222222222,
 'Training F1 scores': array([0.73333333, 0.76923077, 0.77419355, 0.81818182, 0.72093023,
        0.70454545, 0.71264368, 0.77777778, 0.73333333, 0.75862069]),
 'Mean Training F1 Score': 0.7502790635163815,
 'Validation Accuracy scores': array([0.9       , 0.3       , 0.5       , 0.5       , 0.7       ,
        0.4       , 0.4       , 0.4       , 0.66666667, 0.66666667]),
 'Mean Validation Accuracy': 54.33333333333334,
 'Validation Precision scores': array([1.        , 0.25      , 0.5       , 0.5       , 0.66666667,
        0.4       , 0.4       , 0.42857143, 0.66666667, 0.75      ]),
 'Mean Validation Precision': 0.5561904761904762,
 'Validation Recall scores': array([0.8, 0.2, 0.6, 0.6, 0.8, 0.4, 0.4, 0.6, 0.8, 0.6]),
 'Mean Validation Recall': 0.58,
 'Validation F1 scores': array([0.88888889, 0.22222222, 0.54545455, 0.54545455, 0.72727273,
        0.4       , 0.4       , 0.5       , 0.72727273, 0.66666667]),
 'Mean Validation F1 Score': 0.5623232323232324}
#y_O

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_combined, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")

logis_result
The main results are 
 Mean Validation F1: 0.4307575757575758 
 Mean Validation Accuracy 47.0
{'Training Accuracy scores': array([0.63636364, 0.68181818, 0.69318182, 0.67045455, 0.71590909,
        0.71590909, 0.73863636, 0.65909091, 0.66292135, 0.66292135]),
 'Mean Training Accuracy': 68.3720633299285,
 'Training Precision scores': array([0.63265306, 0.70731707, 0.69565217, 0.67391304, 0.73809524,
        0.72727273, 0.76190476, 0.65957447, 0.6744186 , 0.6744186 ]),
 'Mean Training Precision': 0.6945219756446686,
 'Training Recall scores': array([0.68888889, 0.64444444, 0.71111111, 0.68888889, 0.68888889,
        0.71111111, 0.71111111, 0.68888889, 0.64444444, 0.64444444]),
 'Mean Training Recall': 0.6822222222222223,
 'Training F1 scores': array([0.65957447, 0.6744186 , 0.7032967 , 0.68131868, 0.71264368,
        0.71910112, 0.73563218, 0.67391304, 0.65909091, 0.65909091]),
 'Mean Training F1 Score': 0.6878080304676203,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.2       , 0.4       , 0.2       ,
        0.6       , 0.4       , 0.8       , 0.44444444, 0.55555556]),
 'Mean Validation Accuracy': 47.0,
 'Validation Precision scores': array([0.5       , 0.57142857, 0.        , 0.4       , 0.        ,
        0.66666667, 0.33333333, 0.8       , 0.5       , 0.6       ]),
 'Mean Validation Precision': 0.4371428571428571,
 'Validation Recall scores': array([0.6, 0.8, 0. , 0.4, 0. , 0.4, 0.2, 0.8, 0.6, 0.6]),
 'Mean Validation Recall': 0.43999999999999995,
 'Validation F1 scores': array([0.54545455, 0.66666667, 0.        , 0.4       , 0.        ,
        0.5       , 0.25      , 0.8       , 0.54545455, 0.6       ]),
 'Mean Validation F1 Score': 0.4307575757575758}
#y_C

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_combined, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")

logis_result
The main results are 
 Mean Validation F1: 0.5800233100233101 
 Mean Validation Accuracy 52.888888888888886
{'Training Accuracy scores': array([0.65909091, 0.69318182, 0.70454545, 0.70454545, 0.70454545,
        0.64772727, 0.68181818, 0.71590909, 0.70786517, 0.74157303]),
 'Mean Training Accuracy': 69.60801838610827,
 'Training Precision scores': array([0.66666667, 0.71111111, 0.71428571, 0.71428571, 0.71428571,
        0.66666667, 0.68627451, 0.69642857, 0.72340426, 0.75      ]),
 'Mean Training Precision': 0.7043408923853229,
 'Training Recall scores': array([0.69565217, 0.69565217, 0.74468085, 0.74468085, 0.74468085,
        0.68085106, 0.74468085, 0.82978723, 0.72340426, 0.76595745]),
 'Mean Training Recall': 0.7370027752081406,
 'Training F1 scores': array([0.68085106, 0.7032967 , 0.72916667, 0.72916667, 0.72916667,
        0.67368421, 0.71428571, 0.75728155, 0.72340426, 0.75789474]),
 'Mean Training F1 Score': 0.7198198237497834,
 'Validation Accuracy scores': array([0.9       , 0.3       , 0.5       , 0.8       , 0.4       ,
        0.5       , 0.4       , 0.6       , 0.55555556, 0.33333333]),
 'Mean Validation Accuracy': 52.888888888888886,
 'Validation Precision scores': array([1.        , 0.4       , 0.5       , 0.8       , 0.4       ,
        0.5       , 0.4       , 0.6       , 0.57142857, 0.42857143]),
 'Mean Validation Precision': 0.5599999999999999,
 'Validation Recall scores': array([0.83333333, 0.33333333, 0.6       , 0.8       , 0.4       ,
        0.8       , 0.4       , 0.6       , 0.8       , 0.6       ]),
 'Mean Validation Recall': 0.6166666666666666,
 'Validation F1 scores': array([0.90909091, 0.36363636, 0.54545455, 0.8       , 0.4       ,
        0.61538462, 0.4       , 0.6       , 0.66666667, 0.5       ]),
 'Mean Validation F1 Score': 0.5800233100233101}
#y_N

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_combined, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")

logis_result
The main results are 
 Mean Validation F1: 0.5598484848484848 
 Mean Validation Accuracy 55.00000000000001
{'Training Accuracy scores': array([0.69318182, 0.67045455, 0.70454545, 0.76136364, 0.73863636,
        0.70454545, 0.67045455, 0.73863636, 0.68539326, 0.73033708]),
 'Mean Training Accuracy': 70.97548518896834,
 'Training Precision scores': array([0.7       , 0.6875    , 0.71428571, 0.76      , 0.74      ,
        0.71428571, 0.68085106, 0.74468085, 0.71111111, 0.74468085]),
 'Mean Training Precision': 0.7197395305639986,
 'Training Recall scores': array([0.74468085, 0.70212766, 0.74468085, 0.80851064, 0.78723404,
        0.74468085, 0.69565217, 0.76086957, 0.68085106, 0.74468085]),
 'Mean Training Recall': 0.7413968547641073,
 'Training F1 scores': array([0.72164948, 0.69473684, 0.72916667, 0.78350515, 0.7628866 ,
        0.72916667, 0.68817204, 0.75268817, 0.69565217, 0.74468085]),
 'Mean Training F1 Score': 0.7302304652582635,
 'Validation Accuracy scores': array([0.4       , 0.7       , 0.5       , 0.5       , 0.5       ,
        0.5       , 0.9       , 0.5       , 0.66666667, 0.33333333]),
 'Mean Validation Accuracy': 55.00000000000001,
 'Validation Precision scores': array([0.42857143, 0.66666667, 0.5       , 0.5       , 0.5       ,
        0.5       , 1.        , 0.55555556, 0.75      , 0.33333333]),
 'Mean Validation Precision': 0.5734126984126984,
 'Validation Recall scores': array([0.6       , 0.8       , 0.4       , 0.4       , 0.4       ,
        0.6       , 0.83333333, 0.83333333, 0.6       , 0.2       ]),
 'Mean Validation Recall': 0.5666666666666667,
 'Validation F1 scores': array([0.5       , 0.72727273, 0.44444444, 0.44444444, 0.44444444,
        0.54545455, 0.90909091, 0.66666667, 0.66666667, 0.25      ]),
 'Mean Validation F1 Score': 0.5598484848484848}
#y_E

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_combined, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")

logis_result
The main results are 
 Mean Validation F1: 0.5271855921855921 
 Mean Validation Accuracy 50.22222222222222
{'Training Accuracy scores': array([0.64772727, 0.65909091, 0.69318182, 0.70454545, 0.67045455,
        0.67045455, 0.68181818, 0.76136364, 0.66292135, 0.6741573 ]),
 'Mean Training Accuracy': 68.25715015321757,
 'Training Precision scores': array([0.65217391, 0.67391304, 0.68627451, 0.69230769, 0.68085106,
        0.67346939, 0.68      , 0.76595745, 0.66      , 0.66666667]),
 'Mean Training Precision': 0.6831613723693419,
 'Training Recall scores': array([0.66666667, 0.67391304, 0.76086957, 0.7826087 , 0.69565217,
        0.7173913 , 0.73913043, 0.7826087 , 0.7173913 , 0.73913043]),
 'Mean Training Recall': 0.727536231884058,
 'Training F1 scores': array([0.65934066, 0.67391304, 0.72164948, 0.73469388, 0.68817204,
        0.69473684, 0.70833333, 0.77419355, 0.6875    , 0.70103093]),
 'Mean Training F1 Score': 0.704356375957752,
 'Validation Accuracy scores': array([0.4       , 0.6       , 0.4       , 0.3       , 0.3       ,
        0.8       , 0.4       , 0.6       , 0.66666667, 0.55555556]),
 'Mean Validation Accuracy': 50.22222222222222,
 'Validation Precision scores': array([0.5       , 0.6       , 0.42857143, 0.25      , 0.375     ,
        1.        , 0.44444444, 0.6       , 0.75      , 0.66666667]),
 'Mean Validation Precision': 0.561468253968254,
 'Validation Recall scores': array([0.33333333, 0.6       , 0.6       , 0.2       , 0.6       ,
        0.6       , 0.8       , 0.6       , 0.6       , 0.4       ]),
 'Mean Validation Recall': 0.5333333333333333,
 'Validation F1 scores': array([0.4       , 0.6       , 0.5       , 0.22222222, 0.46153846,
        0.75      , 0.57142857, 0.6       , 0.66666667, 0.5       ]),
 'Mean Validation F1 Score': 0.5271855921855921}

2.1 shuffled IES -> personality#

#y_A

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_random, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5549728049728049 
 Mean Validation Accuracy 52.77777777777778
{'Training Accuracy scores': array([0.56818182, 0.57954545, 0.61363636, 0.55681818, 0.56818182,
        0.57954545, 0.57954545, 0.55681818, 0.59550562, 0.57303371]),
 'Mean Training Accuracy': 57.708120531154236,
 'Training Precision scores': array([0.56603774, 0.57692308, 0.61702128, 0.55172414, 0.57142857,
        0.57142857, 0.58      , 0.55769231, 0.59574468, 0.56862745]),
 'Mean Training Precision': 0.5756627809679818,
 'Training Recall scores': array([0.66666667, 0.66666667, 0.64444444, 0.71111111, 0.62222222,
        0.71111111, 0.64444444, 0.64444444, 0.62222222, 0.64444444]),
 'Mean Training Recall': 0.6577777777777778,
 'Training F1 scores': array([0.6122449 , 0.6185567 , 0.63043478, 0.62135922, 0.59574468,
        0.63366337, 0.61052632, 0.59793814, 0.60869565, 0.60416667]),
 'Mean Training F1 Score': 0.6133330431047426,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.5       , 0.6       , 0.6       ,
        0.6       , 0.5       , 0.6       , 0.44444444, 0.33333333]),
 'Mean Validation Accuracy': 52.77777777777778,
 'Validation Precision scores': array([0.5       , 0.57142857, 0.5       , 0.6       , 0.57142857,
        0.66666667, 0.5       , 0.57142857, 0.5       , 0.4       ]),
 'Mean Validation Precision': 0.5380952380952382,
 'Validation Recall scores': array([0.4, 0.8, 0.4, 0.6, 0.8, 0.4, 0.8, 0.8, 0.6, 0.4]),
 'Mean Validation Recall': 0.6,
 'Validation F1 scores': array([0.44444444, 0.66666667, 0.44444444, 0.6       , 0.66666667,
        0.5       , 0.61538462, 0.66666667, 0.54545455, 0.4       ]),
 'Mean Validation F1 Score': 0.5549728049728049}
#y_O

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_random, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5517782217782219 
 Mean Validation Accuracy 50.0
{'Training Accuracy scores': array([0.64772727, 0.59090909, 0.56818182, 0.60227273, 0.59090909,
        0.61363636, 0.625     , 0.64772727, 0.5505618 , 0.58426966]),
 'Mean Training Accuracy': 60.211950970377934,
 'Training Precision scores': array([0.64      , 0.58490566, 0.56363636, 0.59259259, 0.58823529,
        0.61702128, 0.61538462, 0.63461538, 0.54901961, 0.58333333]),
 'Mean Training Precision': 0.5968744128496176,
 'Training Recall scores': array([0.71111111, 0.68888889, 0.68888889, 0.71111111, 0.66666667,
        0.64444444, 0.71111111, 0.73333333, 0.62222222, 0.62222222]),
 'Mean Training Recall': 0.6799999999999999,
 'Training F1 scores': array([0.67368421, 0.63265306, 0.62      , 0.64646465, 0.625     ,
        0.63043478, 0.65979381, 0.68041237, 0.58333333, 0.60215054]),
 'Mean Training F1 Score': 0.6353926757358901,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.5       , 0.3       , 0.6       ,
        0.4       , 0.8       , 0.3       , 0.55555556, 0.44444444]),
 'Mean Validation Accuracy': 50.0,
 'Validation Precision scores': array([0.5       , 0.6       , 0.5       , 0.33333333, 0.66666667,
        0.4       , 0.8       , 0.33333333, 0.55555556, 0.5       ]),
 'Mean Validation Precision': 0.518888888888889,
 'Validation Recall scores': array([0.8, 0.6, 0.6, 0.4, 0.4, 0.4, 0.8, 0.4, 1. , 0.8]),
 'Mean Validation Recall': 0.62,
 'Validation F1 scores': array([0.61538462, 0.6       , 0.54545455, 0.36363636, 0.5       ,
        0.4       , 0.8       , 0.36363636, 0.71428571, 0.61538462]),
 'Mean Validation F1 Score': 0.5517782217782219}
#y_C

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_random, y_C, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.47019813519813525 
 Mean Validation Accuracy 42.8888888888889
{'Training Accuracy scores': array([0.59090909, 0.55681818, 0.55681818, 0.53409091, 0.48863636,
        0.59090909, 0.57954545, 0.54545455, 0.56179775, 0.56179775]),
 'Mean Training Accuracy': 55.66777323799796,
 'Training Precision scores': array([0.59259259, 0.57142857, 0.57407407, 0.55769231, 0.51785714,
        0.60377358, 0.59615385, 0.56140351, 0.57407407, 0.57692308]),
 'Mean Training Precision': 0.5725972779473276,
 'Training Recall scores': array([0.69565217, 0.60869565, 0.65957447, 0.61702128, 0.61702128,
        0.68085106, 0.65957447, 0.68085106, 0.65957447, 0.63829787]),
 'Mean Training Recall': 0.6517113783533764,
 'Training F1 scores': array([0.64      , 0.58947368, 0.61386139, 0.58585859, 0.5631068 ,
        0.64      , 0.62626263, 0.61538462, 0.61386139, 0.60606061]),
 'Mean Training F1 Score': 0.6093869686170692,
 'Validation Accuracy scores': array([0.2       , 0.5       , 0.5       , 0.5       , 0.5       ,
        0.3       , 0.5       , 0.4       , 0.33333333, 0.55555556]),
 'Mean Validation Accuracy': 42.8888888888889,
 'Validation Precision scores': array([0.25      , 0.57142857, 0.5       , 0.5       , 0.5       ,
        0.33333333, 0.5       , 0.33333333, 0.4       , 0.66666667]),
 'Mean Validation Precision': 0.4554761904761905,
 'Validation Recall scores': array([0.16666667, 0.66666667, 0.6       , 0.6       , 1.        ,
        0.4       , 0.8       , 0.2       , 0.4       , 0.4       ]),
 'Mean Validation Recall': 0.5233333333333334,
 'Validation F1 scores': array([0.2       , 0.61538462, 0.54545455, 0.54545455, 0.66666667,
        0.36363636, 0.61538462, 0.25      , 0.4       , 0.5       ]),
 'Mean Validation F1 Score': 0.47019813519813525}
#y_N

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_random, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5247585747585748 
 Mean Validation Accuracy 44.666666666666664
{'Training Accuracy scores': array([0.61363636, 0.54545455, 0.59090909, 0.55681818, 0.64772727,
        0.54545455, 0.56818182, 0.57954545, 0.70786517, 0.60674157]),
 'Mean Training Accuracy': 59.623340143003055,
 'Training Precision scores': array([0.61818182, 0.56140351, 0.59649123, 0.56896552, 0.62903226,
        0.55555556, 0.57142857, 0.57894737, 0.67213115, 0.59677419]),
 'Mean Training Precision': 0.5948911166824369,
 'Training Recall scores': array([0.72340426, 0.68085106, 0.72340426, 0.70212766, 0.82978723,
        0.74468085, 0.69565217, 0.7173913 , 0.87234043, 0.78723404]),
 'Mean Training Recall': 0.7476873265494912,
 'Training F1 scores': array([0.66666667, 0.61538462, 0.65384615, 0.62857143, 0.71559633,
        0.63636364, 0.62745098, 0.6407767 , 0.75925926, 0.67889908]),
 'Mean Training F1 Score': 0.662281485235708,
 'Validation Accuracy scores': array([0.4       , 0.6       , 0.3       , 0.5       , 0.5       ,
        0.4       , 0.7       , 0.4       , 0.11111111, 0.55555556]),
 'Mean Validation Accuracy': 44.666666666666664,
 'Validation Precision scores': array([0.44444444, 0.57142857, 0.33333333, 0.5       , 0.5       ,
        0.42857143, 0.71428571, 0.5       , 0.        , 0.55555556]),
 'Mean Validation Precision': 0.45476190476190476,
 'Validation Recall scores': array([0.8       , 0.8       , 0.4       , 0.6       , 0.6       ,
        0.6       , 0.83333333, 0.66666667, 0.        , 1.        ]),
 'Mean Validation Recall': 0.6300000000000001,
 'Validation F1 scores': array([0.57142857, 0.66666667, 0.36363636, 0.54545455, 0.54545455,
        0.5       , 0.76923077, 0.57142857, 0.        , 0.71428571]),
 'Mean Validation F1 Score': 0.5247585747585748}
#y_E

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_random, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5758008658008658 
 Mean Validation Accuracy 54.22222222222223
{'Training Accuracy scores': array([0.625     , 0.625     , 0.63636364, 0.57954545, 0.63636364,
        0.61363636, 0.67045455, 0.65909091, 0.59550562, 0.61797753]),
 'Mean Training Accuracy': 62.58937691521961,
 'Training Precision scores': array([0.63043478, 0.63265306, 0.64      , 0.58823529, 0.65217391,
        0.625     , 0.67346939, 0.67391304, 0.6       , 0.63043478]),
 'Mean Training Precision': 0.6346314264836368,
 'Training Recall scores': array([0.64444444, 0.67391304, 0.69565217, 0.65217391, 0.65217391,
        0.65217391, 0.7173913 , 0.67391304, 0.65217391, 0.63043478]),
 'Mean Training Recall': 0.6644444444444444,
 'Training F1 scores': array([0.63736264, 0.65263158, 0.66666667, 0.6185567 , 0.65217391,
        0.63829787, 0.69473684, 0.67391304, 0.625     , 0.63043478]),
 'Mean Training F1 Score': 0.6489774037583723,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.6       , 0.7       , 0.4       ,
        0.6       , 0.3       , 0.5       , 0.55555556, 0.66666667]),
 'Mean Validation Accuracy': 54.22222222222223,
 'Validation Precision scores': array([0.6       , 0.6       , 0.57142857, 0.66666667, 0.44444444,
        0.57142857, 0.33333333, 0.5       , 0.66666667, 1.        ]),
 'Mean Validation Precision': 0.5953968253968254,
 'Validation Recall scores': array([0.5, 0.6, 0.8, 0.8, 0.8, 0.8, 0.4, 0.6, 0.4, 0.4]),
 'Mean Validation Recall': 0.6100000000000001,
 'Validation F1 scores': array([0.54545455, 0.6       , 0.66666667, 0.72727273, 0.57142857,
        0.66666667, 0.36363636, 0.54545455, 0.5       , 0.57142857]),
 'Mean Validation F1 Score': 0.5758008658008658}

2.2 IES + shuffled BOLD -> personality#

#y_A

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_BOLD_shuf, y_A, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5623232323232324 
 Mean Validation Accuracy 54.33333333333334
{'Training Accuracy scores': array([0.72727273, 0.76136364, 0.76136364, 0.81818182, 0.72727273,
        0.70454545, 0.71590909, 0.77272727, 0.73033708, 0.76404494]),
 'Mean Training Accuracy': 74.83018386108274,
 'Training Precision scores': array([0.73333333, 0.76086957, 0.75      , 0.8372093 , 0.75609756,
        0.72093023, 0.73809524, 0.77777778, 0.73333333, 0.78571429]),
 'Mean Training Precision': 0.759336062933069,
 'Training Recall scores': array([0.73333333, 0.77777778, 0.8       , 0.8       , 0.68888889,
        0.68888889, 0.68888889, 0.77777778, 0.73333333, 0.73333333]),
 'Mean Training Recall': 0.7422222222222222,
 'Training F1 scores': array([0.73333333, 0.76923077, 0.77419355, 0.81818182, 0.72093023,
        0.70454545, 0.71264368, 0.77777778, 0.73333333, 0.75862069]),
 'Mean Training F1 Score': 0.7502790635163815,
 'Validation Accuracy scores': array([0.9       , 0.3       , 0.5       , 0.5       , 0.7       ,
        0.4       , 0.4       , 0.4       , 0.66666667, 0.66666667]),
 'Mean Validation Accuracy': 54.33333333333334,
 'Validation Precision scores': array([1.        , 0.25      , 0.5       , 0.5       , 0.66666667,
        0.4       , 0.4       , 0.42857143, 0.66666667, 0.75      ]),
 'Mean Validation Precision': 0.5561904761904762,
 'Validation Recall scores': array([0.8, 0.2, 0.6, 0.6, 0.8, 0.4, 0.4, 0.6, 0.8, 0.6]),
 'Mean Validation Recall': 0.58,
 'Validation F1 scores': array([0.88888889, 0.22222222, 0.54545455, 0.54545455, 0.72727273,
        0.4       , 0.4       , 0.5       , 0.72727273, 0.66666667]),
 'Mean Validation F1 Score': 0.5623232323232324}
#y_O

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_BOLD_shuf, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.4307575757575758 
 Mean Validation Accuracy 47.0
{'Training Accuracy scores': array([0.63636364, 0.68181818, 0.69318182, 0.67045455, 0.71590909,
        0.71590909, 0.73863636, 0.65909091, 0.66292135, 0.66292135]),
 'Mean Training Accuracy': 68.3720633299285,
 'Training Precision scores': array([0.63265306, 0.70731707, 0.69565217, 0.67391304, 0.73809524,
        0.72727273, 0.76190476, 0.65957447, 0.6744186 , 0.6744186 ]),
 'Mean Training Precision': 0.6945219756446686,
 'Training Recall scores': array([0.68888889, 0.64444444, 0.71111111, 0.68888889, 0.68888889,
        0.71111111, 0.71111111, 0.68888889, 0.64444444, 0.64444444]),
 'Mean Training Recall': 0.6822222222222223,
 'Training F1 scores': array([0.65957447, 0.6744186 , 0.7032967 , 0.68131868, 0.71264368,
        0.71910112, 0.73563218, 0.67391304, 0.65909091, 0.65909091]),
 'Mean Training F1 Score': 0.6878080304676203,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.2       , 0.4       , 0.2       ,
        0.6       , 0.4       , 0.8       , 0.44444444, 0.55555556]),
 'Mean Validation Accuracy': 47.0,
 'Validation Precision scores': array([0.5       , 0.57142857, 0.        , 0.4       , 0.        ,
        0.66666667, 0.33333333, 0.8       , 0.5       , 0.6       ]),
 'Mean Validation Precision': 0.4371428571428571,
 'Validation Recall scores': array([0.6, 0.8, 0. , 0.4, 0. , 0.4, 0.2, 0.8, 0.6, 0.6]),
 'Mean Validation Recall': 0.43999999999999995,
 'Validation F1 scores': array([0.54545455, 0.66666667, 0.        , 0.4       , 0.        ,
        0.5       , 0.25      , 0.8       , 0.54545455, 0.6       ]),
 'Mean Validation F1 Score': 0.4307575757575758}
#y_C

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_BOLD_shuf, y_O, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.4307575757575758 
 Mean Validation Accuracy 47.0
{'Training Accuracy scores': array([0.63636364, 0.68181818, 0.69318182, 0.67045455, 0.71590909,
        0.71590909, 0.73863636, 0.65909091, 0.66292135, 0.66292135]),
 'Mean Training Accuracy': 68.3720633299285,
 'Training Precision scores': array([0.63265306, 0.70731707, 0.69565217, 0.67391304, 0.73809524,
        0.72727273, 0.76190476, 0.65957447, 0.6744186 , 0.6744186 ]),
 'Mean Training Precision': 0.6945219756446686,
 'Training Recall scores': array([0.68888889, 0.64444444, 0.71111111, 0.68888889, 0.68888889,
        0.71111111, 0.71111111, 0.68888889, 0.64444444, 0.64444444]),
 'Mean Training Recall': 0.6822222222222223,
 'Training F1 scores': array([0.65957447, 0.6744186 , 0.7032967 , 0.68131868, 0.71264368,
        0.71910112, 0.73563218, 0.67391304, 0.65909091, 0.65909091]),
 'Mean Training F1 Score': 0.6878080304676203,
 'Validation Accuracy scores': array([0.5       , 0.6       , 0.2       , 0.4       , 0.2       ,
        0.6       , 0.4       , 0.8       , 0.44444444, 0.55555556]),
 'Mean Validation Accuracy': 47.0,
 'Validation Precision scores': array([0.5       , 0.57142857, 0.        , 0.4       , 0.        ,
        0.66666667, 0.33333333, 0.8       , 0.5       , 0.6       ]),
 'Mean Validation Precision': 0.4371428571428571,
 'Validation Recall scores': array([0.6, 0.8, 0. , 0.4, 0. , 0.4, 0.2, 0.8, 0.6, 0.6]),
 'Mean Validation Recall': 0.43999999999999995,
 'Validation F1 scores': array([0.54545455, 0.66666667, 0.        , 0.4       , 0.        ,
        0.5       , 0.25      , 0.8       , 0.54545455, 0.6       ]),
 'Mean Validation F1 Score': 0.4307575757575758}
#y_N

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_BOLD_shuf, y_N, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5598484848484848 
 Mean Validation Accuracy 55.00000000000001
{'Training Accuracy scores': array([0.69318182, 0.67045455, 0.70454545, 0.76136364, 0.73863636,
        0.70454545, 0.67045455, 0.73863636, 0.68539326, 0.73033708]),
 'Mean Training Accuracy': 70.97548518896834,
 'Training Precision scores': array([0.7       , 0.6875    , 0.71428571, 0.76      , 0.74      ,
        0.71428571, 0.68085106, 0.74468085, 0.71111111, 0.74468085]),
 'Mean Training Precision': 0.7197395305639986,
 'Training Recall scores': array([0.74468085, 0.70212766, 0.74468085, 0.80851064, 0.78723404,
        0.74468085, 0.69565217, 0.76086957, 0.68085106, 0.74468085]),
 'Mean Training Recall': 0.7413968547641073,
 'Training F1 scores': array([0.72164948, 0.69473684, 0.72916667, 0.78350515, 0.7628866 ,
        0.72916667, 0.68817204, 0.75268817, 0.69565217, 0.74468085]),
 'Mean Training F1 Score': 0.7302304652582635,
 'Validation Accuracy scores': array([0.4       , 0.7       , 0.5       , 0.5       , 0.5       ,
        0.5       , 0.9       , 0.5       , 0.66666667, 0.33333333]),
 'Mean Validation Accuracy': 55.00000000000001,
 'Validation Precision scores': array([0.42857143, 0.66666667, 0.5       , 0.5       , 0.5       ,
        0.5       , 1.        , 0.55555556, 0.75      , 0.33333333]),
 'Mean Validation Precision': 0.5734126984126984,
 'Validation Recall scores': array([0.6       , 0.8       , 0.4       , 0.4       , 0.4       ,
        0.6       , 0.83333333, 0.83333333, 0.6       , 0.2       ]),
 'Mean Validation Recall': 0.5666666666666667,
 'Validation F1 scores': array([0.5       , 0.72727273, 0.44444444, 0.44444444, 0.44444444,
        0.54545455, 0.90909091, 0.66666667, 0.66666667, 0.25      ]),
 'Mean Validation F1 Score': 0.5598484848484848}
#y_E

logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model


logis_result = cross_validation(logis_reg_model, X_IES_BOLD_shuf, y_E, num_folds)

print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")



logis_result
The main results are 
 Mean Validation F1: 0.5271855921855921 
 Mean Validation Accuracy 50.22222222222222
{'Training Accuracy scores': array([0.64772727, 0.65909091, 0.69318182, 0.70454545, 0.67045455,
        0.67045455, 0.68181818, 0.76136364, 0.66292135, 0.6741573 ]),
 'Mean Training Accuracy': 68.25715015321757,
 'Training Precision scores': array([0.65217391, 0.67391304, 0.68627451, 0.69230769, 0.68085106,
        0.67346939, 0.68      , 0.76595745, 0.66      , 0.66666667]),
 'Mean Training Precision': 0.6831613723693419,
 'Training Recall scores': array([0.66666667, 0.67391304, 0.76086957, 0.7826087 , 0.69565217,
        0.7173913 , 0.73913043, 0.7826087 , 0.7173913 , 0.73913043]),
 'Mean Training Recall': 0.727536231884058,
 'Training F1 scores': array([0.65934066, 0.67391304, 0.72164948, 0.73469388, 0.68817204,
        0.69473684, 0.70833333, 0.77419355, 0.6875    , 0.70103093]),
 'Mean Training F1 Score': 0.704356375957752,
 'Validation Accuracy scores': array([0.4       , 0.6       , 0.4       , 0.3       , 0.3       ,
        0.8       , 0.4       , 0.6       , 0.66666667, 0.55555556]),
 'Mean Validation Accuracy': 50.22222222222222,
 'Validation Precision scores': array([0.5       , 0.6       , 0.42857143, 0.25      , 0.375     ,
        1.        , 0.44444444, 0.6       , 0.75      , 0.66666667]),
 'Mean Validation Precision': 0.561468253968254,
 'Validation Recall scores': array([0.33333333, 0.6       , 0.6       , 0.2       , 0.6       ,
        0.6       , 0.8       , 0.6       , 0.6       , 0.4       ]),
 'Mean Validation Recall': 0.5333333333333333,
 'Validation F1 scores': array([0.4       , 0.6       , 0.5       , 0.22222222, 0.46153846,
        0.75      , 0.57142857, 0.6       , 0.66666667, 0.5       ]),
 'Mean Validation F1 Score': 0.5271855921855921}
model_name = "SVM"
plot_result(model_name,
            "Accuracy",
            "Accuracy scores in 10 Folds",
            svm_result["Training Accuracy scores"],
            svm_result["Validation Accuracy scores"],
            num_folds=num_folds)
_images/test_model_NMA_107_0.png
model_name = "SVM"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            svm_result["Training F1 scores"],
            svm_result["Validation F1 scores"],
            num_folds=num_folds)
_images/test_model_NMA_108_0.png

IES data#


from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

#define the model
svm = LinearSVC(max_iter=50000)
#perform cross validation with model

svm_result_ies = cross_validation(svm, X_IES_ye, ies_y_E, num_folds)
svm_result_ies
{'Training Accuracy scores': array([0.62352941, 0.62352941, 0.61176471, 0.68235294, 0.57647059,
        0.61627907, 0.63953488, 0.65116279, 0.60465116, 0.60465116]),
 'Mean Training Accuracy': 62.33926128590971,
 'Training Precision scores': array([0.60655738, 0.61290323, 0.61016949, 0.6557377 , 0.58064516,
        0.6031746 , 0.62711864, 0.63333333, 0.59677419, 0.60344828]),
 'Mean Training Precision': 0.61298620105756,
 'Training Recall scores': array([0.82222222, 0.82608696, 0.7826087 , 0.86956522, 0.7826087 ,
        0.82608696, 0.80434783, 0.82608696, 0.80434783, 0.76086957]),
 'Mean Training Recall': 0.8104830917874397,
 'Training F1 scores': array([0.69811321, 0.7037037 , 0.68571429, 0.74766355, 0.66666667,
        0.69724771, 0.7047619 , 0.71698113, 0.68518519, 0.67307692]),
 'Mean Training F1 Score': 0.6979114266555199,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.3       , 0.3       , 0.7       ,
        0.88888889, 0.66666667, 0.66666667, 0.88888889, 0.44444444]),
 'Mean Validation Accuracy': 60.55555555555555,
 'Validation Precision scores': array([0.75      , 0.57142857, 0.375     , 0.375     , 0.625     ,
        1.        , 0.625     , 0.625     , 0.83333333, 0.5       ]),
 'Mean Validation Precision': 0.6279761904761905,
 'Validation Recall scores': array([0.5, 0.8, 0.6, 0.6, 1. , 0.8, 1. , 1. , 1. , 0.6]),
 'Mean Validation Recall': 0.7899999999999999,
 'Validation F1 scores': array([0.6       , 0.66666667, 0.46153846, 0.46153846, 0.76923077,
        0.88888889, 0.76923077, 0.76923077, 0.90909091, 0.54545455]),
 'Mean Validation F1 Score': 0.6840870240870242}
print(f"The main results are \n Mean Validation F1: {svm_result_ies['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result_ies['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.6840870240870242 
 Mean Validation Accuracy 60.55555555555555
model_name = "SVM"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            svm_result_ies["Training F1 scores"],
            svm_result_ies["Validation F1 scores"],
            num_folds=num_folds)
_images/test_model_NMA_112_0.png
model_name = "SVM"
plot_result(model_name,
            "Accuracy",
            "Accuracy scores in 10 Folds",
            svm_result_ies["Training Accuracy scores"],
            svm_result_ies["Validation Accuracy scores"],
            num_folds=num_folds)
_images/test_model_NMA_113_0.png

Let’s train an svm and see some predictions

svm.fit(X_IES_ye[:70], ies_y_E[:70])
LinearSVC(max_iter=50000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
preds = svm.predict(X_IES_ye[70:])
preds
array([1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 0])
ies_y_E[70:]
array([1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 1])
np.mean(preds == ies_y_E[70:])
0.64
from sklearn.metrics import f1_score
f1_score(preds, ies_y_E[70:])
0.7096774193548386

We can see that using IES Data we get significantly better results than using BOLD data

Logistic Regression#


BOLD Data#

from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model
logis_result = cross_validation(logis_reg_model, X_BOLD_ye, bold_y_E, num_folds)
logis_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.3       , 0.2       , 0.6       , 0.6       , 0.5       ,
        0.55555556, 0.33333333, 0.55555556, 0.44444444, 0.55555556]),
 'Mean Validation Accuracy': 46.444444444444436,
 'Validation Precision scores': array([0.4       , 0.28571429, 0.57142857, 0.6       , 0.5       ,
        0.57142857, 0.42857143, 0.66666667, 0.5       , 0.6       ]),
 'Mean Validation Precision': 0.5123809523809524,
 'Validation Recall scores': array([0.33333333, 0.4       , 0.8       , 0.6       , 0.6       ,
        0.8       , 0.6       , 0.4       , 0.4       , 0.6       ]),
 'Mean Validation Recall': 0.5533333333333333,
 'Validation F1 scores': array([0.36363636, 0.33333333, 0.66666667, 0.6       , 0.54545455,
        0.66666667, 0.5       , 0.5       , 0.44444444, 0.6       ]),
 'Mean Validation F1 Score': 0.522020202020202}
print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.522020202020202 
 Mean Validation Accuracy 46.444444444444436
model_name = "Logistic Regression"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            logis_result["Training F1 scores"],
            logis_result["Validation F1 scores"])
_images/test_model_NMA_126_0.png
model_name = "Logistic Regression"
plot_result(model_name,
            "Accuracy",
            "Accuracy scores in 10 Folds",
            logis_result["Training Accuracy scores"],
            logis_result["Validation Accuracy scores"])
_images/test_model_NMA_127_0.png

IES Data#

from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model
logis_result = cross_validation(logis_reg_model, X_IES_ye, ies_y_E, num_folds)
logis_result
{'Training Accuracy scores': array([0.61176471, 0.61176471, 0.61176471, 0.69411765, 0.6       ,
        0.61627907, 0.62790698, 0.61627907, 0.59302326, 0.62790698]),
 'Mean Training Accuracy': 62.10807113543091,
 'Training Precision scores': array([0.59677419, 0.61016949, 0.61016949, 0.66666667, 0.6       ,
        0.61016949, 0.62068966, 0.61818182, 0.59016393, 0.62068966]),
 'Mean Training Precision': 0.61436743977442,
 'Training Recall scores': array([0.82222222, 0.7826087 , 0.7826087 , 0.86956522, 0.7826087 ,
        0.7826087 , 0.7826087 , 0.73913043, 0.7826087 , 0.7826087 ]),
 'Mean Training Recall': 0.7909178743961353,
 'Training F1 scores': array([0.69158879, 0.68571429, 0.68571429, 0.75471698, 0.67924528,
        0.68571429, 0.69230769, 0.67326733, 0.6728972 , 0.69230769]),
 'Mean Training F1 Score': 0.691347381395027,
 'Validation Accuracy scores': array([0.7       , 0.6       , 0.3       , 0.3       , 0.7       ,
        0.77777778, 0.66666667, 0.55555556, 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 59.33333333333333,
 'Validation Precision scores': array([0.8       , 0.57142857, 0.375     , 0.375     , 0.625     ,
        1.        , 0.625     , 0.55555556, 0.8       , 0.6       ]),
 'Mean Validation Precision': 0.6326984126984126,
 'Validation Recall scores': array([0.66666667, 0.8       , 0.6       , 0.6       , 1.        ,
        0.6       , 1.        , 1.        , 0.8       , 0.6       ]),
 'Mean Validation Recall': 0.7666666666666667,
 'Validation F1 scores': array([0.72727273, 0.66666667, 0.46153846, 0.46153846, 0.76923077,
        0.75      , 0.76923077, 0.71428571, 0.8       , 0.6       ]),
 'Mean Validation F1 Score': 0.6719763569763569}
print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.6719763569763569 
 Mean Validation Accuracy 59.33333333333333
model_name = "Logistic Regression"
plot_result(model_name,
            "Accuracy",
            "Accuracy scores in 10 Folds",
            logis_result["Training Accuracy scores"],
            logis_result["Validation Accuracy scores"])
_images/test_model_NMA_131_0.png

Multi-layer Perceptron (MLP)#


BOLD Data#

from sklearn.neural_network import MLPClassifier
# the input layer of the neural network has size 72 (the number of features)
# we reduce this to 50 and then to 30 and in the last layer, we return 2 values (the classes)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=[50, 30]) # the solver is lbfgs because it works better than adam (the default one) for small datasets
mlp_result = cross_validation(mlp, X_BOLD_ye, bold_y_E, num_folds)
mlp_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.6       , 0.3       , 0.5       , 0.2       , 0.3       ,
        0.66666667, 0.55555556, 0.55555556, 0.66666667, 0.66666667]),
 'Mean Validation Accuracy': 50.11111111111111,
 'Validation Precision scores': array([0.66666667, 0.375     , 0.5       , 0.28571429, 0.25      ,
        0.75      , 0.66666667, 0.6       , 0.625     , 0.66666667]),
 'Mean Validation Precision': 0.5385714285714285,
 'Validation Recall scores': array([0.66666667, 0.6       , 1.        , 0.4       , 0.2       ,
        0.6       , 0.4       , 0.6       , 1.        , 0.8       ]),
 'Mean Validation Recall': 0.6266666666666667,
 'Validation F1 scores': array([0.66666667, 0.46153846, 0.66666667, 0.33333333, 0.22222222,
        0.66666667, 0.5       , 0.6       , 0.76923077, 0.72727273]),
 'Mean Validation F1 Score': 0.5613597513597514}
print(f"The main results are \n Mean Validation F1: {mlp_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {mlp_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.5613597513597514 
 Mean Validation Accuracy 50.11111111111111
model_name = "MLP"
plot_result(model_name,
            "F1",
            "F1 scores in 10 Folds",
            mlp_result["Training F1 scores"],
            mlp_result["Validation F1 scores"])
_images/test_model_NMA_136_0.png
model_name = "MLP"
plot_result(model_name,
            "Accuracy",
            "Accuracy scores in 10 Folds",
            mlp_result["Training Accuracy scores"],
            mlp_result["Validation Accuracy scores"])
_images/test_model_NMA_137_0.png

IES Data#

from sklearn.neural_network import MLPClassifier
# the input layer of the neural network has size 72 (the number of features)
# we reduce this to 50 and then to 30 and in the last layer, we return 2 values (the classes)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=[50, 30], max_iter=5000) # the solver is lbfgs because it works better than adam (the default one) for small datasets
mlp_result = cross_validation(mlp, X_IES_ye, ies_y_E, num_folds)
mlp_result
{'Training Accuracy scores': array([0.90588235, 0.96470588, 1.        , 0.98823529, 0.96470588,
        1.        , 0.97674419, 1.        , 0.81395349, 0.97674419]),
 'Mean Training Accuracy': 95.90971272229822,
 'Training Precision scores': array([0.91111111, 0.95744681, 1.        , 0.9787234 , 0.95744681,
        1.        , 0.95833333, 1.        , 0.8       , 0.97826087]),
 'Mean Training Precision': 0.9541322335286259,
 'Training Recall scores': array([0.91111111, 0.97826087, 1.        , 1.        , 0.97826087,
        1.        , 1.        , 1.        , 0.86956522, 0.97826087]),
 'Mean Training Recall': 0.9715458937198067,
 'Training F1 scores': array([0.91111111, 0.96774194, 1.        , 0.98924731, 0.96774194,
        1.        , 0.9787234 , 1.        , 0.83333333, 0.97826087]),
 'Mean Training F1 Score': 0.962615990106068,
 'Validation Accuracy scores': array([0.4       , 0.4       , 0.4       , 0.5       , 0.2       ,
        0.66666667, 0.55555556, 0.55555556, 0.44444444, 0.55555556]),
 'Mean Validation Accuracy': 46.77777777777778,
 'Validation Precision scores': array([0.5       , 0.42857143, 0.44444444, 0.5       , 0.2       ,
        0.75      , 0.57142857, 0.6       , 0.5       , 0.57142857]),
 'Mean Validation Precision': 0.5065873015873015,
 'Validation Recall scores': array([0.16666667, 0.6       , 0.8       , 0.2       , 0.2       ,
        0.6       , 0.8       , 0.6       , 0.4       , 0.8       ]),
 'Mean Validation Recall': 0.5166666666666667,
 'Validation F1 scores': array([0.25      , 0.5       , 0.57142857, 0.28571429, 0.2       ,
        0.66666667, 0.66666667, 0.6       , 0.44444444, 0.66666667]),
 'Mean Validation F1 Score': 0.48515873015873023}
print(f"The main results are \n Mean Validation F1: {mlp_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {mlp_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.48515873015873023 
 Mean Validation Accuracy 46.77777777777778

Conclusions from training to classify y_E#


Consistently across multiple models (SVM, logistic regression, and multi-layer perceptron), using IES Data yields better performance than BOLD data

SVM#

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

#define the model
svm = LinearSVC(max_iter=100000)
#perform cross validation with model

svm_result = cross_validation(svm, X_BOLD_yn, bold_y_N, num_folds)
svm_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.7       , 0.5       , 0.8       , 0.5       , 0.6       ,
        0.44444444, 0.66666667, 0.44444444, 0.77777778, 0.22222222]),
 'Mean Validation Accuracy': 56.55555555555555,
 'Validation Precision scores': array([0.75      , 0.5       , 0.71428571, 0.5       , 0.57142857,
        0.33333333, 1.        , 0.5       , 1.        , 0.25      ]),
 'Mean Validation Precision': 0.6119047619047618,
 'Validation Recall scores': array([0.6 , 0.4 , 1.  , 0.6 , 0.8 , 0.25, 0.4 , 0.4 , 0.6 , 0.2 ]),
 'Mean Validation Recall': 0.525,
 'Validation F1 scores': array([0.66666667, 0.44444444, 0.83333333, 0.54545455, 0.66666667,
        0.28571429, 0.57142857, 0.44444444, 0.75      , 0.22222222]),
 'Mean Validation F1 Score': 0.543037518037518}
print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.543037518037518 
 Mean Validation Accuracy 56.55555555555555
ies_y_N
array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0])
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

#define the model
svm = LinearSVC(max_iter=50000)
#perform cross validation with model

svm_result_ies = cross_validation(svm, X_IES_yn, ies_y_N, num_folds)
svm_result_ies
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
{'Training Accuracy scores': array([0.61176471, 0.61176471, 0.58823529, 0.62352941, 0.57647059,
        0.61627907, 0.60465116, 0.61627907, 0.6627907 , 0.60465116]),
 'Mean Training Accuracy': 61.16415868673051,
 'Training Precision scores': array([0.62790698, 0.62790698, 0.62162162, 0.63636364, 0.59090909,
        0.64285714, 0.61904762, 0.62790698, 0.66666667, 0.60869565]),
 'Mean Training Precision': 0.626988235987225,
 'Training Recall scores': array([0.61363636, 0.61363636, 0.52272727, 0.63636364, 0.59090909,
        0.6       , 0.59090909, 0.61363636, 0.68181818, 0.63636364]),
 'Mean Training Recall': 0.6100000000000001,
 'Training F1 scores': array([0.62068966, 0.62068966, 0.56790123, 0.63636364, 0.59090909,
        0.62068966, 0.60465116, 0.62068966, 0.6741573 , 0.62222222]),
 'Mean Training F1 Score': 0.6178963270913991,
 'Validation Accuracy scores': array([0.4       , 0.6       , 0.4       , 0.6       , 0.5       ,
        0.33333333, 0.66666667, 0.44444444, 0.33333333, 0.77777778]),
 'Mean Validation Accuracy': 50.55555555555556,
 'Validation Precision scores': array([0.4       , 0.6       , 0.33333333, 0.57142857, 0.        ,
        0.375     , 1.        , 0.5       , 0.33333333, 0.8       ]),
 'Mean Validation Precision': 0.4913095238095238,
 'Validation Recall scores': array([0.4 , 0.6 , 0.2 , 0.8 , 0.  , 0.75, 0.4 , 0.6 , 0.2 , 0.8 ]),
 'Mean Validation Recall': 0.475,
 'Validation F1 scores': array([0.4       , 0.6       , 0.25      , 0.66666667, 0.        ,
        0.5       , 0.57142857, 0.54545455, 0.25      , 0.8       ]),
 'Mean Validation F1 Score': 0.4583549783549784}
print(f"The main results are \n Mean Validation F1: {svm_result_ies['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result_ies['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.4583549783549784 
 Mean Validation Accuracy 50.55555555555556

Logistic Regression#

from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model
logis_result = cross_validation(logis_reg_model, X_BOLD_yn, bold_y_N, num_folds)
logis_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.7       , 0.5       , 0.8       , 0.5       , 0.6       ,
        0.44444444, 0.88888889, 0.44444444, 0.88888889, 0.33333333]),
 'Mean Validation Accuracy': 60.999999999999986,
 'Validation Precision scores': array([0.75      , 0.5       , 0.71428571, 0.5       , 0.57142857,
        0.        , 1.        , 0.5       , 1.        , 0.4       ]),
 'Mean Validation Precision': 0.5935714285714286,
 'Validation Recall scores': array([0.6, 0.4, 1. , 0.6, 0.8, 0. , 0.8, 0.4, 0.8, 0.4]),
 'Mean Validation Recall': 0.58,
 'Validation F1 scores': array([0.66666667, 0.44444444, 0.83333333, 0.54545455, 0.66666667,
        0.        , 0.88888889, 0.44444444, 0.88888889, 0.4       ]),
 'Mean Validation F1 Score': 0.5778787878787879}
print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.5778787878787879 
 Mean Validation Accuracy 60.999999999999986
from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model
logis_result = cross_validation(logis_reg_model, X_IES_ye, ies_y_E, num_folds)
logis_result
{'Training Accuracy scores': array([0.61176471, 0.61176471, 0.61176471, 0.69411765, 0.6       ,
        0.61627907, 0.62790698, 0.61627907, 0.59302326, 0.62790698]),
 'Mean Training Accuracy': 62.10807113543091,
 'Training Precision scores': array([0.59677419, 0.61016949, 0.61016949, 0.66666667, 0.6       ,
        0.61016949, 0.62068966, 0.61818182, 0.59016393, 0.62068966]),
 'Mean Training Precision': 0.61436743977442,
 'Training Recall scores': array([0.82222222, 0.7826087 , 0.7826087 , 0.86956522, 0.7826087 ,
        0.7826087 , 0.7826087 , 0.73913043, 0.7826087 , 0.7826087 ]),
 'Mean Training Recall': 0.7909178743961353,
 'Training F1 scores': array([0.69158879, 0.68571429, 0.68571429, 0.75471698, 0.67924528,
        0.68571429, 0.69230769, 0.67326733, 0.6728972 , 0.69230769]),
 'Mean Training F1 Score': 0.691347381395027,
 'Validation Accuracy scores': array([0.7       , 0.6       , 0.3       , 0.3       , 0.7       ,
        0.77777778, 0.66666667, 0.55555556, 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 59.33333333333333,
 'Validation Precision scores': array([0.8       , 0.57142857, 0.375     , 0.375     , 0.625     ,
        1.        , 0.625     , 0.55555556, 0.8       , 0.6       ]),
 'Mean Validation Precision': 0.6326984126984126,
 'Validation Recall scores': array([0.66666667, 0.8       , 0.6       , 0.6       , 1.        ,
        0.6       , 1.        , 1.        , 0.8       , 0.6       ]),
 'Mean Validation Recall': 0.7666666666666667,
 'Validation F1 scores': array([0.72727273, 0.66666667, 0.46153846, 0.46153846, 0.76923077,
        0.75      , 0.76923077, 0.71428571, 0.8       , 0.6       ]),
 'Mean Validation F1 Score': 0.6719763569763569}
print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.6719763569763569 
 Mean Validation Accuracy 59.33333333333333

MLP#

from sklearn.neural_network import MLPClassifier
# the input layer of the neural network has size 72 (the number of features)
# we reduce this to 50 and then to 30 and in the last layer, we return 2 values (the classes)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=[50, 30]) # the solver is lbfgs because it works better than adam (the default one) for small datasets
mlp_result = cross_validation(mlp, X_BOLD_yn, bold_y_N, num_folds)
mlp_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.6       , 0.6       , 0.5       ,
        0.44444444, 0.66666667, 0.66666667, 0.33333333, 0.22222222]),
 'Mean Validation Accuracy': 52.33333333333332,
 'Validation Precision scores': array([0.66666667, 0.57142857, 0.6       , 0.57142857, 0.5       ,
        0.        , 1.        , 0.66666667, 0.33333333, 0.33333333]),
 'Mean Validation Precision': 0.5242857142857142,
 'Validation Recall scores': array([0.4, 0.8, 0.6, 0.8, 0.4, 0. , 0.4, 0.8, 0.2, 0.4]),
 'Mean Validation Recall': 0.4800000000000001,
 'Validation F1 scores': array([0.5       , 0.66666667, 0.6       , 0.66666667, 0.44444444,
        0.        , 0.57142857, 0.72727273, 0.25      , 0.36363636]),
 'Mean Validation F1 Score': 0.47901154401154394}
print(f"The main results are \n Mean Validation F1: {mlp_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {mlp_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.47901154401154394 
 Mean Validation Accuracy 52.33333333333332
from sklearn.neural_network import MLPClassifier
# the input layer of the neural network has size 72 (the number of features)
# we reduce this to 50 and then to 30 and in the last layer, we return 2 values (the classes)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=[50, 30], max_iter=5000) # the solver is lbfgs because it works better than adam (the default one) for small datasets
mlp_result_ies = cross_validation(mlp, X_IES_yn, ies_y_N, num_folds)
mlp_result_ies
{'Training Accuracy scores': array([0.90588235, 1.        , 0.82352941, 0.92941176, 1.        ,
        0.89534884, 0.90697674, 0.93023256, 0.98837209, 0.98837209]),
 'Mean Training Accuracy': 93.6812585499316,
 'Training Precision scores': array([0.89130435, 1.        , 0.82222222, 0.93181818, 1.        ,
        0.84615385, 0.90909091, 0.93181818, 1.        , 1.        ]),
 'Mean Training Precision': 0.9332407688929429,
 'Training Recall scores': array([0.93181818, 1.        , 0.84090909, 0.93181818, 1.        ,
        0.97777778, 0.90909091, 0.93181818, 0.97727273, 0.97727273]),
 'Mean Training Recall': 0.9477777777777776,
 'Training F1 scores': array([0.91111111, 1.        , 0.83146067, 0.93181818, 1.        ,
        0.90721649, 0.90909091, 0.93181818, 0.98850575, 0.98850575]),
 'Mean Training F1 Score': 0.9399527047093923,
 'Validation Accuracy scores': array([0.6       , 0.6       , 0.6       , 0.7       , 0.3       ,
        0.55555556, 0.44444444, 0.44444444, 0.22222222, 0.55555556]),
 'Mean Validation Accuracy': 50.22222222222222,
 'Validation Precision scores': array([0.55555556, 0.6       , 0.57142857, 0.66666667, 0.25      ,
        0.5       , 0.5       , 0.5       , 0.25      , 0.66666667]),
 'Mean Validation Precision': 0.506031746031746,
 'Validation Recall scores': array([1. , 0.6, 0.8, 0.8, 0.2, 0.5, 0.4, 0.6, 0.2, 0.4]),
 'Mean Validation Recall': 0.55,
 'Validation F1 scores': array([0.71428571, 0.6       , 0.66666667, 0.72727273, 0.22222222,
        0.5       , 0.44444444, 0.54545455, 0.22222222, 0.5       ]),
 'Mean Validation F1 Score': 0.5142568542568543}
print(f"The main results are \n Mean Validation F1: {mlp_result_ies['Mean Validation F1 Score']} \n Mean Validation Accuracy {mlp_result_ies['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.5142568542568543 
 Mean Validation Accuracy 50.22222222222222

Conclusion#

BOLD data seems to be a bit better than IES Data to predict y_N

Training Y_A#


SVM#

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result = cross_validation(svm, X_BOLD_ya, bold_y_A, num_folds)
svm_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.84615385, 0.53846154, 0.76923077, 1.        , 0.46153846,
        0.76923077, 0.61538462, 0.91666667, 0.75      , 0.91666667]),
 'Mean Validation Accuracy': 75.83333333333333,
 'Validation Precision scores': array([0.77777778, 1.        , 0.75      , 1.        , 0.5       ,
        1.        , 1.        , 1.        , 0.71428571, 1.        ]),
 'Mean Validation Precision': 0.8742063492063492,
 'Validation Recall scores': array([1.        , 0.14285714, 0.85714286, 1.        , 0.28571429,
        0.57142857, 0.28571429, 0.83333333, 0.83333333, 0.83333333]),
 'Mean Validation Recall': 0.6642857142857143,
 'Validation F1 scores': array([0.875     , 0.25      , 0.8       , 1.        , 0.36363636,
        0.72727273, 0.44444444, 0.90909091, 0.76923077, 0.90909091]),
 'Mean Validation F1 Score': 0.7047766122766123}
print(f"The main results are \n Mean Validation F1: {svm_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.7047766122766123 
 Mean Validation Accuracy 75.83333333333333
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

#define the model
svm = LinearSVC(max_iter=1000000)
#perform cross validation with model

svm_result_ies = cross_validation(svm, X_IES_ya, ies_y_A, num_folds)
svm_result_ies
{'Training Accuracy scores': array([0.61403509, 0.68421053, 0.62280702, 0.61403509, 0.65789474,
        0.6754386 , 0.65789474, 0.70434783, 0.66086957, 0.68695652]),
 'Mean Training Accuracy': 65.78489702517162,
 'Training Precision scores': array([0.6       , 0.69354839, 0.62686567, 0.61111111, 0.65671642,
        0.65333333, 0.65217391, 0.67532468, 0.6375    , 0.67123288]),
 'Mean Training Precision': 0.647780638617394,
 'Training Recall scores': array([0.8       , 0.71666667, 0.7       , 0.73333333, 0.73333333,
        0.81666667, 0.75      , 0.85245902, 0.83606557, 0.80327869]),
 'Mean Training Recall': 0.7741803278688525,
 'Training F1 scores': array([0.68571429, 0.70491803, 0.66141732, 0.66666667, 0.69291339,
        0.72592593, 0.69767442, 0.75362319, 0.72340426, 0.73134328]),
 'Mean Training F1 Score': 0.7043600765666869,
 'Validation Accuracy scores': array([0.84615385, 0.23076923, 0.38461538, 0.76923077, 0.61538462,
        0.84615385, 0.69230769, 0.58333333, 0.66666667, 0.41666667]),
 'Mean Validation Accuracy': 60.51282051282052,
 'Validation Precision scores': array([0.77777778, 0.28571429, 0.4       , 0.75      , 0.625     ,
        1.        , 0.63636364, 0.57142857, 0.625     , 0.42857143]),
 'Mean Validation Precision': 0.6099855699855701,
 'Validation Recall scores': array([1.        , 0.28571429, 0.28571429, 0.85714286, 0.71428571,
        0.71428571, 1.        , 0.66666667, 0.83333333, 0.5       ]),
 'Mean Validation Recall': 0.6857142857142857,
 'Validation F1 scores': array([0.875     , 0.28571429, 0.33333333, 0.8       , 0.66666667,
        0.83333333, 0.77777778, 0.61538462, 0.71428571, 0.46153846]),
 'Mean Validation F1 Score': 0.6363034188034189}
print(f"The main results are \n Mean Validation F1: {svm_result_ies['Mean Validation F1 Score']} \n Mean Validation Accuracy {svm_result_ies['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.6363034188034189 
 Mean Validation Accuracy 60.51282051282052

Logistic Regression#

from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model
logis_result = cross_validation(logis_reg_model, X_BOLD_ya, bold_y_A, num_folds)
logis_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.84615385, 0.53846154, 0.69230769, 0.92307692, 0.69230769,
        0.84615385, 0.69230769, 0.75      , 0.75      , 0.91666667]),
 'Mean Validation Accuracy': 76.47435897435898,
 'Validation Precision scores': array([0.77777778, 1.        , 0.71428571, 1.        , 0.71428571,
        1.        , 1.        , 1.        , 0.71428571, 1.        ]),
 'Mean Validation Precision': 0.8920634920634921,
 'Validation Recall scores': array([1.        , 0.14285714, 0.71428571, 0.85714286, 0.71428571,
        0.71428571, 0.42857143, 0.5       , 0.83333333, 0.83333333]),
 'Mean Validation Recall': 0.6738095238095237,
 'Validation F1 scores': array([0.875     , 0.25      , 0.71428571, 0.92307692, 0.71428571,
        0.83333333, 0.6       , 0.66666667, 0.76923077, 0.90909091]),
 'Mean Validation F1 Score': 0.725497002997003}
print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.725497002997003 
 Mean Validation Accuracy 76.47435897435898
from sklearn.linear_model import LogisticRegression
#define the model
logis_reg_model = LogisticRegression(penalty='l2', max_iter=10000)
#perform cross validation with model
logis_result_ies = cross_validation(logis_reg_model, X_IES_ye, ies_y_E, num_folds)
logis_result_ies
{'Training Accuracy scores': array([0.61176471, 0.61176471, 0.61176471, 0.69411765, 0.6       ,
        0.61627907, 0.62790698, 0.61627907, 0.59302326, 0.62790698]),
 'Mean Training Accuracy': 62.10807113543091,
 'Training Precision scores': array([0.59677419, 0.61016949, 0.61016949, 0.66666667, 0.6       ,
        0.61016949, 0.62068966, 0.61818182, 0.59016393, 0.62068966]),
 'Mean Training Precision': 0.61436743977442,
 'Training Recall scores': array([0.82222222, 0.7826087 , 0.7826087 , 0.86956522, 0.7826087 ,
        0.7826087 , 0.7826087 , 0.73913043, 0.7826087 , 0.7826087 ]),
 'Mean Training Recall': 0.7909178743961353,
 'Training F1 scores': array([0.69158879, 0.68571429, 0.68571429, 0.75471698, 0.67924528,
        0.68571429, 0.69230769, 0.67326733, 0.6728972 , 0.69230769]),
 'Mean Training F1 Score': 0.691347381395027,
 'Validation Accuracy scores': array([0.7       , 0.6       , 0.3       , 0.3       , 0.7       ,
        0.77777778, 0.66666667, 0.55555556, 0.77777778, 0.55555556]),
 'Mean Validation Accuracy': 59.33333333333333,
 'Validation Precision scores': array([0.8       , 0.57142857, 0.375     , 0.375     , 0.625     ,
        1.        , 0.625     , 0.55555556, 0.8       , 0.6       ]),
 'Mean Validation Precision': 0.6326984126984126,
 'Validation Recall scores': array([0.66666667, 0.8       , 0.6       , 0.6       , 1.        ,
        0.6       , 1.        , 1.        , 0.8       , 0.6       ]),
 'Mean Validation Recall': 0.7666666666666667,
 'Validation F1 scores': array([0.72727273, 0.66666667, 0.46153846, 0.46153846, 0.76923077,
        0.75      , 0.76923077, 0.71428571, 0.8       , 0.6       ]),
 'Mean Validation F1 Score': 0.6719763569763569}
print(f"The main results are \n Mean Validation F1: {logis_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {logis_result_ies['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.6719763569763569 
 Mean Validation Accuracy 59.33333333333333

MLP#

from sklearn.neural_network import MLPClassifier
# the input layer of the neural network has size 72 (the number of features)
# we reduce this to 50 and then to 30 and in the last layer, we return 2 values (the classes)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=[50, 30]) # the solver is lbfgs because it works better than adam (the default one) for small datasets
mlp_result = cross_validation(mlp, X_BOLD_ya, bold_y_A, num_folds)
mlp_result
{'Training Accuracy scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Accuracy': 100.0,
 'Training Precision scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Precision': 1.0,
 'Training Recall scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training Recall': 1.0,
 'Training F1 scores': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 'Mean Training F1 Score': 1.0,
 'Validation Accuracy scores': array([0.69230769, 1.        , 0.61538462, 0.84615385, 0.53846154,
        0.84615385, 0.84615385, 0.83333333, 0.75      , 1.        ]),
 'Mean Validation Accuracy': 79.67948717948718,
 'Validation Precision scores': array([0.71428571, 1.        , 0.6       , 1.        , 0.6       ,
        1.        , 1.        , 1.        , 0.71428571, 1.        ]),
 'Mean Validation Precision': 0.862857142857143,
 'Validation Recall scores': array([0.71428571, 1.        , 0.85714286, 0.71428571, 0.42857143,
        0.71428571, 0.71428571, 0.66666667, 0.83333333, 1.        ]),
 'Mean Validation Recall': 0.7642857142857142,
 'Validation F1 scores': array([0.71428571, 1.        , 0.70588235, 0.83333333, 0.5       ,
        0.83333333, 0.83333333, 0.8       , 0.76923077, 1.        ]),
 'Mean Validation F1 Score': 0.798939883645766}
print(f"The main results are \n Mean Validation F1: {mlp_result['Mean Validation F1 Score']} \n Mean Validation Accuracy {mlp_result['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.798939883645766 
 Mean Validation Accuracy 79.67948717948718
from sklearn.neural_network import MLPClassifier
# the input layer of the neural network has size 72 (the number of features)
# we reduce this to 50 and then to 30 and in the last layer, we return 2 values (the classes)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=[50, 30], max_iter=5000) # the solver is lbfgs because it works better than adam (the default one) for small datasets
mlp_result_ies = cross_validation(mlp, X_IES_ya, ies_y_A, num_folds)
mlp_result_ies
{'Training Accuracy scores': array([1.        , 1.        , 0.96491228, 0.87719298, 0.92982456,
        1.        , 1.        , 0.9826087 , 1.        , 1.        ]),
 'Mean Training Accuracy': 97.54538520213578,
 'Training Precision scores': array([1.        , 1.        , 0.98275862, 0.91071429, 0.94827586,
        1.        , 1.        , 0.98360656, 1.        , 1.        ]),
 'Mean Training Precision': 0.9825355325849955,
 'Training Recall scores': array([1.        , 1.        , 0.95      , 0.85      , 0.91666667,
        1.        , 1.        , 0.98360656, 1.        , 1.        ]),
 'Mean Training Recall': 0.9700273224043716,
 'Training F1 scores': array([1.        , 1.        , 0.96610169, 0.87931034, 0.93220339,
        1.        , 1.        , 0.98360656, 1.        , 1.        ]),
 'Mean Training F1 Score': 0.9761221986950398,
 'Validation Accuracy scores': array([0.92307692, 0.92307692, 0.84615385, 0.69230769, 0.69230769,
        0.76923077, 0.76923077, 0.83333333, 0.91666667, 0.91666667]),
 'Mean Validation Accuracy': 82.82051282051283,
 'Validation Precision scores': array([1.        , 1.        , 0.85714286, 0.8       , 0.8       ,
        1.        , 1.        , 0.83333333, 1.        , 1.        ]),
 'Mean Validation Precision': 0.9290476190476191,
 'Validation Recall scores': array([0.85714286, 0.85714286, 0.85714286, 0.57142857, 0.57142857,
        0.57142857, 0.57142857, 0.83333333, 0.83333333, 0.83333333]),
 'Mean Validation Recall': 0.7357142857142855,
 'Validation F1 scores': array([0.92307692, 0.92307692, 0.85714286, 0.66666667, 0.66666667,
        0.72727273, 0.72727273, 0.83333333, 0.90909091, 0.90909091]),
 'Mean Validation F1 Score': 0.8142690642690642}
print(f"The main results are \n Mean Validation F1: {mlp_result_ies['Mean Validation F1 Score']} \n Mean Validation Accuracy {mlp_result_ies['Mean Validation Accuracy']}")
The main results are 
 Mean Validation F1: 0.8142690642690642 
 Mean Validation Accuracy 82.82051282051283

Conclusion#

Using SVM and logistic regression, BOLD data is better than IES to predict Y_A. However, using MLP is able to give better results using IES than BOLD to predict Y_A.


OLD CODE

# Plot F1-Score Result
plot_result(model_name,
            "F1",
            "F1 Scores in 10 Folds",
            decision_tree_result["Training F1 scores"],
            decision_tree_result["Validation F1 scores"])
_images/test_model_NMA_178_0.png

The visualizations show that the training accuracy, precision, recall, and f1 scores in each fold are 100%. But the validation accuracy, precision, recall and f1 scores are not as high. We call this over-fitting. The model performs admirably on the training data. But not so much on the validation set.

##Increase Parameter to combat Over-fitting

Visualizing your results like this can help you see if your model is over-fitting. We adjust the min_samples_split hyper-parameter in the decision tree algorithm. It will fix the over-fitting problem. The default value of the min_samples_split parameter is 2. We increase the value to 5.

decision_tree_model_2 = DecisionTreeClassifier(criterion="entropy",
                                               min_samples_split=5,
                                               random_state=0)
decision_tree_result_2 = cross_validation(decision_tree_model_2, X, encoded_y, 10) #specify 10 cross validation here
print(decision_tree_result_2)
{'Training Accuracy scores': array([0.99804688, 0.99804688, 0.99609375, 0.99609375, 0.99414062,
       0.9921875 , 0.99414062, 0.99609375, 0.99414062, 0.99220273]), 'Mean Training Accuracy': 99.51187104044834, 'Training Precision scores': array([0.9947644 , 1.        , 1.        , 0.98963731, 0.99473684,
       0.99470899, 1.        , 1.        , 0.99473684, 0.99470899]), 'Mean Training Precision': 0.9963293377233757, 'Training Recall scores': array([1.        , 0.99473684, 0.9895288 , 1.        , 0.9895288 ,
       0.98429319, 0.98429319, 0.9895288 , 0.9895288 , 0.98429319]), 'Mean Training Recall': 0.9905731606503169, 'Training F1 scores': array([0.99737533, 0.99736148, 0.99473684, 0.99479167, 0.99212598,
       0.98947368, 0.99208443, 0.99473684, 0.99212598, 0.98947368]), 'Mean Training F1 Score': 0.9934285926176409, 'Validation Accuracy scores': array([0.92982456, 0.84210526, 0.96491228, 0.9122807 , 0.98245614,
       0.94736842, 0.9122807 , 0.94736842, 0.96491228, 0.96428571]), 'Mean Validation Accuracy': 93.6779448621554, 'Validation Precision scores': array([0.875     , 0.84210526, 0.95238095, 0.94444444, 1.        ,
       0.90909091, 0.9       , 0.90909091, 0.91304348, 0.91304348]), 'Mean Validation Precision': 0.9158199434686848, 'Validation Recall scores': array([0.95454545, 0.72727273, 0.95238095, 0.80952381, 0.95238095,
       0.95238095, 0.85714286, 0.95238095, 1.        , 1.        ]), 'Mean Validation Recall': 0.9158008658008658, 'Validation F1 scores': array([0.91304348, 0.7804878 , 0.95238095, 0.87179487, 0.97560976,
       0.93023256, 0.87804878, 0.93023256, 0.95454545, 0.95454545]), 'Mean Validation F1 Score': 0.9140921669270087}
# Plot Accuracy Result
plot_result(model_name,
            "Accuracy",
            "Accuracy scores in 10 Folds",
            decision_tree_result_2["Training Accuracy scores"],
            decision_tree_result_2["Validation Accuracy scores"])
_images/test_model_NMA_183_0.png
# Plot Precision Result
plot_result(model_name,
            "Precision",
            "Precision scores in 10 Folds",
            decision_tree_result_2["Training Precision scores"],
            decision_tree_result_2["Validation Precision scores"])
_images/test_model_NMA_184_0.png
# Plot Recall Result
plot_result(model_name,
            "Recall",
            "Recall scores in 10 Folds",
            decision_tree_result_2["Training Recall scores"],
            decision_tree_result_2["Validation Recall scores"])
_images/test_model_NMA_185_0.png
# Plot F1-Score Result
plot_result(model_name,
            "F1",
            "F1 Scores in 10 Folds",
            decision_tree_result_2["Training F1 scores"],
            decision_tree_result_2["Validation F1 scores"])
_images/test_model_NMA_186_0.png