w = windows(data, subj, 500, 250, 500) # fs = subj.sfreq num_folds = 4 skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=10) out_fold_num = 0 # outer-fold number trainsetlist, testsetlist = [],[] subj_results = Results(subject, num_folds, class_names=["apple","orange","car","bus"]) subj_results.get_acc_loss_df(hyp_params, 'Fold') for inner_ind, outer_index in skf.split(data, labels): inner_fold, outer_fold = data[inner_ind], data[outer_index] inner_labels, outer_labels = labels[inner_ind], labels[outer_index] subj_results.concat_y_true(outer_labels) out_fold_num += 1 in_fold_num = 0 trainsetlist.append(SignalAndTarget(inner_fold, inner_labels)) #used for outer-fold train/test testsetlist.append(SignalAndTarget(outer_fold, outer_labels)) for train_idx, valid_idx in skf.split(inner_fold, inner_labels): X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx] y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx] train_set = SignalAndTarget(X_Train, y_train) val_set = SignalAndTarget(X_val, y_val) in_fold_num += 1 hyp_param_acc, hyp_param_loss = [], []
def trainNestedCV(direct, subject, session, filename, hyp_params, parameters): subj = load_subject(direct, subject, 1, filename)["subject"] # # data = subj.data3D.astype(np.float32) # convert data to 3d for deep learning # labels = subj.labels.astype(np.int64) # labels[:] = [x - 1 for x in labels] data, labels = format_data('words', subject, 4096) import random #just for testing labels = [] #just for testing for i in range(200): #just for testing labels.append(random.randint(0, 3)) #just for testing labels = np.array(labels).astype(np.int64) data = data[:200, :, 0:750] unique = np.unique(labels, return_counts=False) data_params = dict(n_classes=len(unique), n_chans=6, input_time_length=subj.epoch) #n_chans = subj.n_chans #w = windows(data, subj, 500, 250, 500) # fs = subj.sfreq # list of windows num_folds = 2 skf = StratifiedKFold( n_splits=num_folds, shuffle=False, random_state=10) # don't randomize trials to preserce structure trainsetlist, testsetlist = [], [] inner_fold_acc, inner_fold_loss, inner_fold_CE = [], [], [] subj_results = Results( subject, filename, num_folds) #, class_names=["apple", "orange", "car", "bus"] subj_results.change_directory(direct) subj_results.get_acc_loss_df( hyp_params, 'Fold') # empty dataframe headed with each HP set clf = Classification(hyp_params, parameters, data_params, "01", "shallow", "words") # classifier object print(f"Inner-fold training for Subject {subject} in progress...") for inner_ind, outer_index in skf.split(data, labels): inner_fold, outer_fold = data[inner_ind], data[outer_index] inner_labels, outer_labels = labels[inner_ind], labels[outer_index] subj_results.concat_y_true(outer_labels) trainsetlist.append(SignalAndTarget( inner_fold, inner_labels)) # used for outer-fold train/test testsetlist.append(SignalAndTarget(outer_fold, outer_labels)) for train_idx, valid_idx in skf.split(inner_fold, inner_labels): X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx] y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx] train_set = SignalAndTarget(X_Train, y_train) val_set = SignalAndTarget(X_val, y_val) hyp_param_acc, hyp_param_loss = [], [] hyp_param_acc, hyp_param_loss, hyp_param_CE = clf.train_inner( train_set, val_set, None, False) inner_fold_loss.append(hyp_param_loss) inner_fold_acc.append(hyp_param_acc) inner_fold_CE.append(hyp_param_CE) subj_results.fill_acc_loss_df(inner_fold_acc, inner_fold_loss, inner_fold_CE) subj_results.get_hp_means( hyp_params, "accuracy") #needed to select inter-subject parameters subj_results.get_best_params("accuracy") clf.best_params = subj_results.best_params clf.set_best_params() print(f"Best parameters selected: {clf.best_params}") print( "///////-------------------------------------------------------///////" ) print( f"Outer-fold training and testing for Subject {subject} in progress..." ) scores, fold_models, predictions, probabilities, outer_cross_entropy = clf.train_outer( trainsetlist, testsetlist, False ) #accuracy score for each fold, combined predictions for each fold subj_results.outer_fold_accuracies = scores subj_results.y_pred = np.array(predictions) subj_results.y_probs = np.array(probabilities) subj_results.outer_fold_cross_entropies = outer_cross_entropy subj_results.train_loss, subj_results.valid_loss, subj_results.test_loss, subj_results.train_acc, subj_results.valid_acc, subj_results.test_acc = get_model_loss_and_acc( fold_models) subj_results.save_result() subj_results.subject_stats() print("") print(subj_results.subject_stats_df.head())