def classif_psd(state, elec, freq, n_jobs=-1): info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] print(state, elec, freq) data_file_name = NAME + "_{}_{}_{}_{}_{:.2f}.mat".format( state, freq, elec, WINDOW, OVERLAP) save_file_name = PREFIX + data_file_name data_file_path = SAVE_PATH / data_file_name save_file_path = SAVE_PATH / "results" / save_file_name data = loadmat(data_file_path) data = prepare_data(data, n_trials=n_trials, random_state=666) data = np.array(data).reshape(len(data), 1) sl2go = StratifiedLeave2GroupsOut() clf = LDA(solver=SOLVER) save = classification(clf, sl2go, data, labels, groups, N_PERM, n_jobs=n_jobs) savemat(save_file_path, save)
def search_comment(): data = json.loads(request.form.get('data')) imgname = data['imgname'].split('/') group = str(classification(imgname)) sql = "select comment from comment where `group`=%s and photoname=%s;" cursor.execute(sql, [group, imgname[-1]]) results = cursor.fetchall() if len(results) != 0: ret_result = "" for result in results: tmp = "<p class=\"form-control_x\">" + result + "</p>\n" ret_result += tmp return ret_result else: return "<p class=\"form-control_x\">" + "暂未有人评论" + "</p>\n"
def main(state, elec): labels = loadmat(LABEL_PATH / state + '_labels.mat')['y'].ravel() labels, groups = create_groups(labels) final_data = None print(state, elec) results_file_path = SAVE_PATH / 'results' /\ 'perm_PSDM_{}_{}_{}_{:.2f}_NoGamma.mat'.format( state, elec, WINDOW, OVERLAP) if not path(results_file_path).isfile(): # print('\nloading PSD for {} frequencies'.format(key)) for key in FREQ_DICT: if not key.startswith('Gamma'): data_file_path = SAVE_PATH /\ 'PSD_{}_{}_{}_{}_{:.2f}.mat'.format( state, key, elec, WINDOW, OVERLAP) if path(data_file_path).isfile(): temp = loadmat(data_file_path)['data'].ravel() data = temp[0].ravel() for submat in temp[1:]: data = np.concatenate((submat.ravel(), data)) data = data.reshape(len(data), 1) final_data = data if final_data is None\ else np.hstack((final_data, data)) del temp else: print(path(data_file_path).name + ' Not found') print('please run "computePSD.py" and\ "group_PSD_per_subjects.py" before\ running this script') # print('classification...') sl2go = StratifiedLeave2GroupsOut() clf = LDA() save = classification(clf, sl2go, final_data, labels, groups, n_jobs=-1) savemat(results_file_path, save)
def send_comment(): data = json.loads(request.form.get('data')) imgname = data['img2src'].split('/') comment_content = data['comment_content'] userid = session.get('userid') # 先找出之前的评论 group = str(classification(imgname)) sql_1 = "select comment from comment where `group`=%s and photoname=%s;" cursor.execute(sql_1, [group, imgname[-1]]) results = cursor.fetchall() # 添加评论 sql_2 = "insert into comment(`group`, photoname, comment, phonenum) values (%s, %s, %s, %s);" cursor.execute(sql_2, [group, imgname[-1], comment_content, userid]) connect.commit() # 新发布的评论显示在最上方 ret_result = "<p class=\"form-control_x\">" + comment_content + "</p>\n" for result in results: tmp = "<p class=\"form-control_x\">" + result + "</p>\n" ret_result += tmp return ret_result
def classif_subcosp(state, freq, elec, n_jobs=-1): global CHANGES print(state, freq) if SUBSAMPLE or ADAPT: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] if SUBSAMPLE: n_trials = info_data.min().min() n_trials = 61 elif ADAPT: n_trials = info_data.min()[state] elif FULL_TRIAL: groups = range(36) labels_og = INIT_LABELS file_path = ( SAVE_PATH / "results" / PREFIX + NAME + "_{}_{}_{}_{}_{:.2f}.npy".format(state, freq, elec, WINDOW, OVERLAP)) if not file_path.isfile(): n_rep = 0 else: final_save = np.load(file_path) n_rep = int(final_save["n_rep"]) n_splits = int(final_save["n_splits"]) print("Starting from i={}".format(n_rep)) file_name = NAME + "_{}_{}_{}_{}_{:.2f}.npy".format( state, freq, elec, WINDOW, OVERLAP) data_file_path = SAVE_PATH / file_name data_og = np.load(data_file_path) if FULL_TRIAL: cv = SSS(9) else: cv = StratifiedShuffleGroupSplit(2) lda = LDA() clf = TSclassifier(clf=lda) for i in range(n_rep, N_BOOTSTRAPS): CHANGES = True if FULL_TRIAL: data = data_og["data"] elif SUBSAMPLE or ADAPT: data, labels, groups = prepare_data(data_og, labels_og, n_trials=n_trials, random_state=i) else: data, labels, groups = prepare_data(data_og, labels_og) n_splits = cv.get_n_splits(None, labels, groups) save = classification(clf, cv, data, labels, groups, N_PERM, n_jobs=n_jobs) if i == 0: final_save = save elif BOOTSTRAP: for key, value in save.items(): if key != "n_splits": final_save[key] += value final_save["n_rep"] = i + 1 np.save(file_path, final_save) final_save["auc_score"] = np.mean(final_save.get("auc_score", 0)) final_save["acc_score"] = np.mean(final_save["acc_score"]) if CHANGES: np.save(file_path, final_save) to_print = "accuracy for {} {} : {:.2f}".format(state, freq, final_save["acc_score"]) if BOOTSTRAP: standev = np.std([ np.mean(final_save["acc"][i * n_splits:(i + 1) * n_splits]) for i in range(N_BOOTSTRAPS) ]) to_print += " (+/- {:.2f})".format(standev) print(to_print) if PERM: print("pval = {}".format(final_save["acc_pvalue"]))
experiment = 'e1' # for some of the variables, we need to rescale them to a more preferable range like 0-1 name_for_scale = ['awareness'] # ['ah', 'av', 'bj', 'cm', 'db', 'ddb', 'fcm', 'kf', 'kk', 'ml', 'qa','sk', 'yv'] # get one of the participants' data participant = 'ah' df_sub = df[df['participant'] == participant] # for 1-back to 4-back for n_back in np.arange(1,5): # experiment score results = utils.classification( df_sub.dropna(), # take out nan rows feature_names, # feature columns target_name, # target column results, # the saving structure participant, # participant's name experiment, # experiment name window = n_back, # N-back chance = False, # it is NOT estimating the chance level but the empirical classification experiment name_for_scale = name_for_scale # scale some of the variables ) # empirical chance level results = utils.classification( df_sub.dropna(), feature_names, target_name, results, participant, experiment, window = n_back, chance = True, # it is to estimate the empirical chance level
def classif_cov(state): """Where the magic happens""" print(state) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_path = SAVE_PATH / "results" / PREFIX + NAME + "_{}.mat".format(state) if not file_path.isfile(): n_rep = 0 else: final_save = proper_loadmat(file_path) n_rep = final_save["n_rep"] print("starting from i={}".format(n_rep)) file_name = NAME + "_{}.mat".format(state) data_file_path = SAVE_PATH / file_name if data_file_path.isfile(): data_og = loadmat(data_file_path) for i in range(n_rep, N_BOOTSTRAPS): if FULL_TRIAL: data = data_og["data"] elif SUBSAMPLE: data = prepare_data(data_og, n_trials=n_trials, random_state=i) else: data = prepare_data(data_og) if REDUCED: reduced_data = [] for submat in data: temp_a = np.delete(submat, i, 0) temp_b = np.delete(temp_a, i, 1) reduced_data.append(temp_b) data = np.asarray(reduced_data) if FULL_TRIAL: crossval = SSS(9) else: crossval = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) save = classification(clf, crossval, data, labels, groups, N_PERM, n_jobs=-1) print(save["acc_score"]) if i == 0: final_save = save elif BOOTSTRAP or REDUCED: for key, value in save.items(): final_save[key] += value final_save["n_rep"] = i + 1 savemat(file_path, final_save) final_save["n_rep"] = N_BOOTSTRAPS if BOOTSTRAP: final_save["auc_score"] = np.mean(final_save["auc_score"]) final_save["acc_score"] = np.mean(final_save["acc_score"]) savemat(file_path, final_save) print("accuracy for %s %s : %0.2f (+/- %0.2f)" % (state, np.mean(save["acc_score"]), np.std(save["acc"]))) if PERM: print("pval = {}".format(save["acc_pvalue"])) else: print(data_file_path.name + " Not found")
def main(state): """Where the magic happens""" print(state) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] ##### FOR A TEST ##### info_data = info_data["SWS"] ##### FOR A TEST ##### N_TRIALS = info_data.min().min() N_SUBS = len(info_data) - 1 groups = [i for _ in range(N_TRIALS) for i in range(N_SUBS)] N_TOTAL = N_TRIALS * N_SUBS labels = [0 if i < N_TOTAL / 2 else 1 for i in range(N_TOTAL)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_name = prefix + name + "n153_{}.mat".format(state) save_file_path = SAVE_PATH / "results" / file_name if not save_file_path.isfile(): data_file_path = SAVE_PATH / name + "_{}.mat".format(state) if data_file_path.isfile(): final_save = None for i in range(N_BOOTSTRAPS): data = loadmat(data_file_path) if FULL_TRIAL: data = data["data"] elif SUBSAMPLE: data = prepare_data(data, n_trials=N_TRIALS, random_state=i) else: data = prepare_data(data) sl2go = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) save = classification(clf, sl2go, data, labels, groups, N_PERM, n_jobs=-1) save["acc_bootstrap"] = [save["acc_score"]] save["auc_bootstrap"] = [save["auc_score"]] if final_save is None: final_save = save else: for key, value in final_save.items(): final_save[key] = final_save[key] + save[key] savemat(save_file_path, final_save) print("accuracy for %s : %0.2f (+/- %0.2f)" % (state, save["acc_score"], np.std(save["acc"]))) else: print(data_file_path.name + " Not found")
def classif_psd(state, elec, n_jobs=-1): if SUBSAMPLE or ADAPT: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] if SUBSAMPLE: n_trials = info_data.min().min() # n_trials = 30 elif ADAPT: n_trials = info_data.min()[state] labels_og = INIT_LABELS for freq in FREQ_DICT: print(state, elec, freq) data_file_name = NAME + "_{}_{}_{}_{}_{:.2f}.mat".format( state, freq, elec, WINDOW, OVERLAP) save_file_name = PREFIX + data_file_name data_file_path = SAVE_PATH / data_file_name save_file_path = SAVE_PATH / "results" / save_file_name if not save_file_path.isfile(): n_rep = 0 else: final_save = proper_loadmat(save_file_path) n_rep = int(final_save["n_rep"]) # n_splits = int(final_save["n_splits"]) CHANGES = False print("Starting from i={}".format(n_rep)) og_data = loadmat(data_file_path) crossval = StratifiedShuffleGroupSplit(2) clf = LDA(solver=SOLVER) for i in range(n_rep, N_BOOTSTRAPS): CHANGES = True if SUBSAMPLE or ADAPT: data, labels, groups = prepare_data(og_data, labels_og, n_trials=n_trials, random_state=i) else: data, labels, groups = prepare_data(og_data, labels_og) # n_splits = crossval.get_n_splits(None, labels, groups) data = np.array(data).reshape(-1, 1) save = classification(clf, crossval, data, labels, groups, N_PERM, n_jobs=n_jobs) # save["n_splits"] = n_splits if i == 0: final_save = save elif BOOTSTRAP: for key, value in save.items(): # if key != "n_splits": final_save[key] += value final_save["n_rep"] = i + 1 if n_jobs == -1: savemat(save_file_path, final_save) if BOOTSTRAP: final_save["auc_score"] = np.mean(final_save["auc_score"]) final_save["acc_score"] = np.mean(final_save["acc_score"]) if CHANGES: savemat(save_file_path, final_save) standev = np.std([ np.mean(final_save["acc"][i:i + N_BOOTSTRAPS]) for i in range(0, len(final_save["acc"]), N_BOOTSTRAPS) ]) print("accuracy for {} {} : {:.2f} (+/- {:.2f})".format( state, elec, final_save["acc_score"], standev)) if PERM: print("pval = {:.4f}".format(final_save["acc_pvalue"]))
window=[], correct=[], awareness=[], confidence=[], RT_correct=[], RT_awareness=[], RT_confidence=[], ) for n_back in range(5): # loop through the number of trials looking back # this is the part that is redundent and the code is long results = classification( df_sub, feature_names, target_name, results, participant, experiment, window=n_back, chance=False, ) temp = pd.DataFrame(results) temp.to_csv(os.path.join( saving_dir, 'att_6_features (experiment score)_{}.csv'.format(participant)), index=False) # save as a csv ################################################################################ # use success, awareness, and confidence as features np.random.seed(12345) # use judgement features feature_names = [ 'correct',
sub=[], model=[], score=[], window=[], correctness=[], awareness=[], confidence=[], ) # use success, awareness, and confidence as features np.random.seed(12345) # use judgement features feature_names = [ 'correctness', 'awareness', 'confidence', ] target_name = 'awareness' experiment = 'e1' name_for_scale = ['awareness'] for participant, df_sub in df.groupby(['participant']): for n_back in np.arange(1, 5): results = utils.classification(df_sub.dropna(), feature_names, target_name, results, participant, experiment, window=n_back, chance=False, name_for_scale=name_for_scale)
target = ( df_block[[ # 'trials', 'success' ]].shift(n_back ) # same thing for the target, but shifting downward .dropna().values) features.append(feature) targets.append(target) features = np.concatenate(features) targets = np.concatenate(targets) features, targets = shuffle(features, targets) # this is the part that is redundent and the code is long results = classification(features, targets, results, participant, experiment, window=n_back) c = pd.DataFrame(results) # tansform a dictionary object to a data frame c.to_csv('../results/Pos.csv', index=False) # save as a csv c = pd.read_csv('../results/Pos.csv') # now it is the nonparametric t test with random resampling ttest = dict(model=[], window=[], ps_mean=[], ps_std=[]) for (model, window), c_sub in c.groupby(['model', 'window']): ps = resample_ttest( c_sub['score'].values, # numpy-array baseline=0.5, # the value we want to compare against to n_ps=500, # estimate the p value 500 times n_permutation=int(5e4) # use 50000 resamplings to estimate 1 p value ) ttest['model'].append(model)
def classif_cosp(state, n_jobs=-1): global CHANGES print(state, "multif") if SUBSAMPLE or ADAPT: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] if SUBSAMPLE: n_trials = info_data.min().min() # n_trials = 30 elif ADAPT: n_trials = info_data.min()[state] elif FULL_TRIAL: groups = range(36) labels_og = INIT_LABELS file_path = (SAVE_PATH / "results" / PREFIX + NAME + "_{}_{}_{:.2f}.mat".format(state, WINDOW, OVERLAP)) if not file_path.isfile(): n_rep = 0 else: final_save = proper_loadmat(file_path) n_rep = int(final_save["n_rep"]) n_splits = int(final_save["n_splits"]) print("Starting from i={}".format(n_rep)) if FULL_TRIAL: crossval = SSS(9) else: crossval = StratifiedShuffleGroupSplit(2) lda = LDA() clf = TSclassifier(clf=lda) for i in range(n_rep, N_BOOTSTRAPS): CHANGES = True data_freqs = [] for freq in FREQ_DICT: file_name = NAME + "_{}_{}_{}_{:.2f}.mat".format( state, freq, WINDOW, OVERLAP) data_file_path = SAVE_PATH / file_name data_og = loadmat(data_file_path)["data"].ravel() data_og = np.asarray([sub.squeeze() for sub in data_og]) if SUBSAMPLE or ADAPT: data, labels, groups = prepare_data(data_og, labels_og, n_trials=n_trials, random_state=i) else: data, labels, groups = prepare_data(data_og, labels_og) data_freqs.append(data) n_splits = crossval.get_n_splits(None, labels, groups) data_freqs = np.asarray(data_freqs).swapaxes(0, 1).swapaxes( 1, 3).swapaxes(1, 2) save = classification(clf, crossval, data, labels, groups, N_PERM, n_jobs=n_jobs) if i == 0: final_save = save elif BOOTSTRAP: for key, value in save.items(): if key != "n_splits": final_save[key] += value final_save["n_rep"] = i + 1 if n_jobs == -1: savemat(file_path, final_save) final_save["auc_score"] = np.mean(final_save.get("auc_score", 0)) final_save["acc_score"] = np.mean(final_save["acc_score"]) if CHANGES: savemat(file_path, final_save) to_print = "accuracy for {} {} : {:.2f}".format(state, freq, final_save["acc_score"]) if BOOTSTRAP: standev = np.std([ np.mean(final_save["acc"][i * n_splits:(i + 1) * n_splits]) for i in range(N_BOOTSTRAPS) ]) to_print += " (+/- {:.2f})".format(standev) print(to_print) if PERM: print("pval = {}".format(final_save["acc_pvalue"]))
def backward_selection(clf, data, labels, cv=3, groups=None, prev_ind=None, prev_score=0, index_list=[]): # Exit condition: we have tried everything if prev_ind == -1: return index_list, prev_score if prev_ind is None: ind = data.shape[1] - 1 else: ind = prev_ind if isinstance(cv, int): index = np.random.permutation(list(range(len(data)))) labels = labels[index] data = data[index] croval = StratifiedKFold(n_splits=cv) else: croval = cv # Do classification save = classification(clf, cv=cv, X=data, y=labels, groups=groups, n_jobs=-1) score = np.mean(save["acc_score"]) # removing ind from features reduced_data = [] for submat in data: temp_a = np.delete(submat, ind, 0) temp_b = np.delete(temp_a, ind, 1) reduced_data.append(temp_b) reduced_data = np.asarray(reduced_data) # reduced_data = np.concatenate((data[:, :ind], data[:, ind+1:]), axis=1) # If better score we continue exploring this reduced data print(data.shape, ind, score, prev_score) if score >= prev_score: if prev_ind is None and ind == data.shape[1] - 1: ind = prev_ind index_list.append(ind) return backward_selection( clf, reduced_data, labels, croval, groups, prev_score=score, index_list=index_list, ) # Else we use the same data but we delete the next index return backward_selection( clf, data, labels, croval, groups, prev_ind=ind - 1, prev_score=prev_score, index_list=index_list, )
window=[], correct=[], awareness=[], confidence=[], RT_correct=[], RT_awareness=[], RT_confidence=[], ) for n_back in range(5): # loop through the number of trials looking back # this is the part that is redundent and the code is long results = classification( df_sub, feature_names, target_name, results, participant, experiment, window=n_back, chance=True, # to estimate the chance level ) temp = pd.DataFrame(results) groupby = temp.columns temp['permutation'] = n_permutation c.append(temp) c = pd.concat(c) # concate #c = c.groupby(groupby).mean().reset_index() c.to_csv(os.path.join( saving_dir, 'Pos_6_features (empirical chance)_{}.csv'.format(participant)), index=False) # save as a csv ################################################################################
df_block[[ # 'trials', 'correct' ]].shift(n_back ) # same thing for the target, but shifting downward .dropna().values) features.append(feature) targets.append(target) features = np.concatenate(features) targets = np.concatenate(targets) features, targets = shuffle(features, targets) # this is the part that is redundent and the code is long results = classification(features, targets, results, participant, experiment, dot_dir, window=n_back, name='success') c = pd.DataFrame(results) c.to_csv('../results/Pos_control.csv', index=False) c = pd.read_csv('../results/Pos_control.csv') ttest = dict(model=[], window=[], ps_mean=[], ps_std=[]) for (model, window), c_sub in c.groupby(['model', 'window']): ps = resample_ttest( c_sub['score'].values, # numpy-array baseline=0.5, # the value we want to compare against to n_ps=500, # estimate the p value 500 times n_permutation=int(5e4) # use 50000 resamplings to estimate 1 p value ) ttest['model'].append(model)