name_for_scale = ['awareness'] # ['ah', 'av', 'bj', 'cm', 'db', 'ddb', 'fcm', 'kf', 'kk', 'ml', 'qa','sk', 'yv'] # get one of the participants' data participant = 'db' df_sub = df[df['participant'] == participant] # for 1-back to 4-back for n_back in np.arange(1,5): X,y,groups = utils.get_features_targets_groups( df_sub.dropna(), n_back = n_back, names = name_for_scale, independent_variables = feature_names, dependent_variable = [target_name,'correctness']) X,y,groups = shuffle(X,y,groups) y,correctness = y[:,0],y[:,1] for model_name,model in utils.make_clfs().items(): cv = LeaveOneOut() print('{}-back,{}'.format(n_back,model_name)) preds = cross_val_predict(model,X,y,groups=groups,cv=cv,method='predict',verbose=2,n_jobs=4) df_pred_ = pd.DataFrame(np.vstack([preds,correctness]).T,columns = ['preds','correct']) p_correct = float(np.sum(correctness == 1)+1) / (len(correctness)+1) p_incorrect = float(np.sum(correctness == 0)+1) / (len(correctness)+1) p_aware = float(np.sum(preds == 1)+1) / (len(preds)+1) p_unaware = float(np.sum(preds == 0)+1) / (len(preds)+1) p_correct_aware = float(np.sum(np.logical_and(correctness == 1, preds == 1))+1) / (len(df_pred_)+1) p_correct_unaware = float(np.sum(np.logical_and(correctness == 1, preds == 0))+1) / (len(df_pred_)+1) p_incorrect_aware = float(np.sum(np.logical_and(correctness == 0, preds == 1))+1) / (len(df_pred_)+1) p_incorrect_unaware = float(np.sum(np.logical_and(correctness == 0, preds == 0))+1) / (len(df_pred_)+1) correlation,pval = stats.spearmanr(preds,correctness) results['sub'].append(participant) results['model'].append(model_name)
pr = 0.7 # selected proportion of the data # select subset of the traiing data and the test data to estimate the variance # of the cross validation # select a proportion of the training data idxs_train = [np.random.choice(len(X_pos), size = int(pr*len(X_pos)), replace = False ) for ii in range(n_cv)] # select a proportion of the test data idxs_test = [np.random.choice(len(X_att), size = int(pr*len(X_att)), replace = False ) for ii in range(n_cv)] # for 2 models, we will perform the cross experiment validation for model_name in make_clfs().keys(): scores = [] permutation_scores = [] n_permutations = 2000 for fold,(idx_train,idx_test) in tqdm(enumerate(zip(idxs_train,idxs_test)), desc='cv-{}'.format(model_name)): # initialize the classifier - LG or RF clf = make_clfs()[model_name] X_train = X_pos[idx_train]# get the training features y_train = y_pos[idx_train]# get the training targets X_test = X_att[idx_test ]# get the testing features y_test = y_att[idx_test ]# get the testing targets clf.fit(X_train,y_train) preds = clf.predict_proba(X_test)
df = df.sort_values([ 'sub', 'model', 'window', 'experiment', ]) df_decode = df_decode.sort_values([ 'sub', 'model', 'window', 'experiment', ]) for col_name in df_decode.columns: if col_name not in df.columns: df[col_name] = df_decode[col_name].values col_to_plot = [ 'p(correct|awareness)', 'p(correct|unawareness)', 'p(incorrect|awareness)', 'p(incorrect|unawareness)' ] fig, axes = plt.subplots(figsize=(20, 25), nrows=4, ncols=2, sharey=True) for col_name, ax in zip(col_to_plot, axes): for model, a in zip(utils.make_clfs().keys(), ax): df_work = df[df['model'] == model] sns.barplot(x='window', y=col_name, hue='experiment', data=df_work, ax=a)
names=[ 'success', # need to normalize to 0 and 1 'awareness', # need to normalize to 0 and 1 'confidence' ], # need to normalize to 0 and 1 independent_variables=['correct', 'awareness', 'confidence'], dependent_variable='success') ################################################################################## ################## train on one experiment and test on the individual subjects in ################## the other experiment ########################################## # train on POS first experiment_train = 'POS' # define the source data experiment_test = 'ATT' # define the target data for participant, df_sub in att.groupby( 'participant'): # loop through the subjects in ATT as test data for model_name in make_clfs().keys(): # loop through the 2 models cv = StratifiedShuffleSplit(n_splits=n_cv, test_size=pr, random_state=12345) # in each fold of the cross-validation for fold, (train, _) in enumerate(cv.split(X_pos, y_pos)): X_train = X_pos[ train] # pick a proportion of the training/source data y_train = y_pos[ train] # pick a proportion of the training/source lables clf = make_clfs()[model_name] # initialize the model clf.fit(X_train, y_train) # fit the model # prepare the test for a give subject X_test, y_test, _ = get_features_targets_groups( df_sub,