def main(state, freq): """Where the magic happens""" print(state, freq) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_path = (SAVE_PATH / "results" / PREFIX + NAME + "_{}_{}_{}_{:.2f}.mat".format(state, freq, WINDOW, OVERLAP)) if not file_path.isfile(): file_name = NAME + "_{}_{}_{}_{:.2f}.mat".format( state, freq, WINDOW, OVERLAP) data_file_path = SAVE_PATH / file_name if data_file_path.isfile(): final_save = {} random_seed = 0 data = loadmat(data_file_path) if FULL_TRIAL: data = data["data"] elif SUBSAMPLE: data = prepare_data(data, n_trials=n_trials, random_state=random_seed) else: data = prepare_data(data) sl2go = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) best_combin, best_score = backward_selection( clf, data, labels, sl2go, groups) final_save = { "best_combin_index": best_combin, "best_combin": CHANNEL_NAMES[best_combin], "score": best_score, } savemat(file_path, final_save) print( f"Best combin: {CHANNEL_NAMES[best_combin]}, score: {best_score}" ) else: print(data_file_path.NAME + " Not found")
def prepare_data(dico, labels=None, rm_outl=None, key="data", n_trials=None, random_state=0): data = dico[key].ravel() data = np.asarray([sub.squeeze() for sub in data]) final_data = None if rm_outl is not None: data = np.asarray([rm_outliers(sub, rm_outl) for sub in data]) sizes = [len(sub) for sub in data] if n_trials is not None: n_sub_min = min(sizes) if n_trials > n_sub_min: print( "can't take {} trials, will take the minimum amout {} instead". format(n_trials, n_sub_min)) n_trials = n_sub_min labels = np.asarray([[lab] * n_trials for lab in labels]) elif labels is not None: labels = np.asarray([labels[i] * size for i, size in enumerate(sizes)]) else: raise Exception("Error: either specify a number of trials and the " + "labels will be generated or give the original labels") labels, groups = create_groups(labels) for submat in data: if submat.shape[0] == 1: submat = submat.ravel() if n_trials is not None: index = np.random.RandomState(random_state).choice(range( len(submat)), n_trials, replace=False) prep_submat = submat[index] else: prep_submat = submat final_data = (prep_submat if final_data is None else np.concatenate( (prep_submat, final_data))) return np.asarray(final_data), labels, groups
def main(state, elec): labels = loadmat(LABEL_PATH / state + '_labels.mat')['y'].ravel() labels, groups = create_groups(labels) final_data = None print(state, elec) results_file_path = SAVE_PATH / 'results' /\ 'perm_PSDM_{}_{}_{}_{:.2f}_NoGamma.mat'.format( state, elec, WINDOW, OVERLAP) if not path(results_file_path).isfile(): # print('\nloading PSD for {} frequencies'.format(key)) for key in FREQ_DICT: if not key.startswith('Gamma'): data_file_path = SAVE_PATH /\ 'PSD_{}_{}_{}_{}_{:.2f}.mat'.format( state, key, elec, WINDOW, OVERLAP) if path(data_file_path).isfile(): temp = loadmat(data_file_path)['data'].ravel() data = temp[0].ravel() for submat in temp[1:]: data = np.concatenate((submat.ravel(), data)) data = data.reshape(len(data), 1) final_data = data if final_data is None\ else np.hstack((final_data, data)) del temp else: print(path(data_file_path).name + ' Not found') print('please run "computePSD.py" and\ "group_PSD_per_subjects.py" before\ running this script') # print('classification...') sl2go = StratifiedLeave2GroupsOut() clf = LDA() save = classification(clf, sl2go, final_data, labels, groups, n_jobs=-1) savemat(results_file_path, save)
clipnorm=args.clip_norm) elif args.optimizer == 'rmsprop': optimizer = tf.keras.optimizers.RMSprop(learning_rate=args.lr, momentum=0.9, clipnorm=args.clip_norm) else: optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr, clipnorm=args.clip_norm) metrics = [ tf.keras.metrics.Accuracy(), tf.keras.metrics.Precision(top_k=5) ] history = {'train': {}, 'val': {}} groups = utils.create_groups( ram_percent=args.memory_ratio, seq_length=args.nb_frames, version=args.dataset_version) # create groups of videos to train for epoch in range(args.epochs // args.epochs_per_group): epoch_history = {'train': [], 'val': []} # loop over the groups for videos, size, train in groups: X = np.empty(shape=(size, args.nb_frames, 256, 256, 3), dtype='uint8') Y = None i = 0 start = time.time() for v in videos: # load training/validation data print('\r Loading actions: {}/{} '.format(i, X.shape[0]), end='')
sleep_list = ["AWA", "NREM", "Rem"] save_path = save_path / "PSD" classifier = "LDA" print("\nClassification of dreamers vs non dreamers") print("features : PSD") print("parameters : window = %i overlap = %0.2f" % (window, overlap)) print("Classifier : " + classifier) for sleep_state in sleep_list: print("\nProcessing state %s" % sleep_state) for elec in range(n_elec): print("electrode : %i/%i" % (elec, n_elec)) t1 = time() y = loadmat(label_path / sleep_state + "_labels.mat")["y"].ravel() y, groups = create_groups(y) for key in freq_dict: results_file_path = ( save_path / "results" / "perm_PSD_%s_%s_%i_%i_%0.2f.mat" % (sleep_state, key, elec, window, overlap) ) # results_file_path = save_path / 'results' / 'perm_PSD_EOG_%s_%s_%i_%i_%0.2f.mat' % (sleep_state, key, elec, window, overlap) # EOG if not path(results_file_path).isfile(): X = None # print('\nloading PSD for %s frequencies' % key) if sleep_state == "NREM": for n in ["S1", "S2", "SWS"]:
""" import numpy as np import sys from scipy.io import savemat, loadmat from sklearn.svm import SVC from sklearn.model_selection import RandomizedSearchCV as RS from utils import StratifiedLeave2GroupsOut, create_groups, prepare_data from params import SAVE_PATH, LABEL_PATH, WINDOW, OVERLAP, FREQ_DICT SAVE_PATH = SAVE_PATH / "psd" PREFIX = "classif_svm_" N_PERM = None STATE, ELEC = sys.argv[1], sys.argv[2][:-1] LABELS = loadmat(LABEL_PATH / STATE + "_labels.mat")["y"].ravel() LABELS, GROUPS = create_groups(LABELS) for freq in FREQ_DICT: print(STATE, ELEC, freq) data_file_name = "PSD_{}_{}_{}_{}_{:.2f}.mat".format( STATE, freq, ELEC, WINDOW, OVERLAP ) save_file_name = PREFIX + data_file_name data_file_path = SAVE_PATH / data_file_name save_file_path = SAVE_PATH / "results" / save_file_name if not save_file_path.isfile(): data = loadmat(data_file_path) data = prepare_data(data) data = np.array(data).reshape(len(data), 1)
def classif_cov(state): """Where the magic happens""" print(state) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_path = SAVE_PATH / "results" / PREFIX + NAME + "_{}.mat".format(state) if not file_path.isfile(): n_rep = 0 else: final_save = proper_loadmat(file_path) n_rep = final_save["n_rep"] print("starting from i={}".format(n_rep)) file_name = NAME + "_{}.mat".format(state) data_file_path = SAVE_PATH / file_name if data_file_path.isfile(): data_og = loadmat(data_file_path) for i in range(n_rep, N_BOOTSTRAPS): if FULL_TRIAL: data = data_og["data"] elif SUBSAMPLE: data = prepare_data(data_og, n_trials=n_trials, random_state=i) else: data = prepare_data(data_og) if REDUCED: reduced_data = [] for submat in data: temp_a = np.delete(submat, i, 0) temp_b = np.delete(temp_a, i, 1) reduced_data.append(temp_b) data = np.asarray(reduced_data) if FULL_TRIAL: crossval = SSS(9) else: crossval = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) save = classification(clf, crossval, data, labels, groups, N_PERM, n_jobs=-1) print(save["acc_score"]) if i == 0: final_save = save elif BOOTSTRAP or REDUCED: for key, value in save.items(): final_save[key] += value final_save["n_rep"] = i + 1 savemat(file_path, final_save) final_save["n_rep"] = N_BOOTSTRAPS if BOOTSTRAP: final_save["auc_score"] = np.mean(final_save["auc_score"]) final_save["acc_score"] = np.mean(final_save["acc_score"]) savemat(file_path, final_save) print("accuracy for %s %s : %0.2f (+/- %0.2f)" % (state, np.mean(save["acc_score"]), np.std(save["acc"]))) if PERM: print("pval = {}".format(save["acc_pvalue"])) else: print(data_file_path.name + " Not found")
def main(state): """Where the magic happens""" print(state) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] ##### FOR A TEST ##### info_data = info_data["SWS"] ##### FOR A TEST ##### N_TRIALS = info_data.min().min() N_SUBS = len(info_data) - 1 groups = [i for _ in range(N_TRIALS) for i in range(N_SUBS)] N_TOTAL = N_TRIALS * N_SUBS labels = [0 if i < N_TOTAL / 2 else 1 for i in range(N_TOTAL)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_name = prefix + name + "n153_{}.mat".format(state) save_file_path = SAVE_PATH / "results" / file_name if not save_file_path.isfile(): data_file_path = SAVE_PATH / name + "_{}.mat".format(state) if data_file_path.isfile(): final_save = None for i in range(N_BOOTSTRAPS): data = loadmat(data_file_path) if FULL_TRIAL: data = data["data"] elif SUBSAMPLE: data = prepare_data(data, n_trials=N_TRIALS, random_state=i) else: data = prepare_data(data) sl2go = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) save = classification(clf, sl2go, data, labels, groups, N_PERM, n_jobs=-1) save["acc_bootstrap"] = [save["acc_score"]] save["auc_bootstrap"] = [save["auc_score"]] if final_save is None: final_save = save else: for key, value in final_save.items(): final_save[key] = final_save[key] + save[key] savemat(save_file_path, final_save) print("accuracy for %s : %0.2f (+/- %0.2f)" % (state, save["acc_score"], np.std(save["acc"]))) else: print(data_file_path.name + " Not found")
print("Press 2 for average daily {}".format(feature)) print("Press 3 for {} within a population".format(feature)) if __name__ == "__main__": os.system("clear") command = int(input("Press 1 for cases or 2 for deaths:\t")) if command == 1 or command == 2: feature = "" if command == 1: feature = "cases" elif command == 2: feature = "deaths" menu(feature) analysis_menu(feature) command = int(input()) if command == 1: mongodb.display_countries() a = mongodb.dates_cases_totals(utils.create_groups(), feature) utils.plot_graphs("Number of Daily {}".format(feature), a) elif command == 2: mongodb.display_countries() a = mongodb.dates_cases_averages(utils.create_groups(), feature) utils.plot_graphs("Number of Average Daily {}".format(feature), a) elif command == 3: greater_than = int(input("You want a population greater than:\t")) less_than = int(input("You want a population less than:\t")) countries = mongodb.aggregate_population_countries( greater_than, less_than, feature) utils.plot_graphs("Population", countries)
def main(state, elec): """feature selection and permutations. For each separation of subjects with leave 2 subjects out, we train on the big set (feature selection) and test on the two remaining subjects. for each permutation, we just permute the labels at the trial level (we could use permutations at the subject level, but we wouldn't get as many permutations) """ final_data = None print(state, elec) results_file_path = ( SAVE_PATH / "results" / "EFS_NoGamma_{}_{}_{}_{:.2f}.mat".format(state, elec, WINDOW, OVERLAP) ) if not results_file_path.isfile(): for freq in FREQS: data_file_path = SAVE_PATH / "psd_{}_{}_{}_{}_{:.2f}.mat".format( state, freq, elec, WINDOW, OVERLAP ) data = loadmat(data_file_path)["data"].ravel() if final_data is None: final_data = data else: for i, submat in enumerate(final_data): final_data[i] = np.concatenate((submat, data[i]), axis=0) final_data = np.array(list(map(np.transpose, final_data))) lil_labels = [0] * 18 + [1] * 18 lil_labels = np.asarray(lil_labels) lil_groups = list(range(36)) sl2go = StratifiedShuffleGroupSplit(2) best_freqs = [] pvalues, pscores = [], [] test_scores, best_scores = [], [] for train_subjects, test_subjects in sl2go.split( final_data, lil_labels, lil_groups ): x_train, x_test = final_data[train_subjects], final_data[test_subjects] y_train, y_test = lil_labels[train_subjects], lil_labels[test_subjects] y_train = [[label] * len(x_train[i]) for i, label in enumerate(y_train)] y_train, groups = create_groups(y_train) x_train = np.concatenate(x_train[:], axis=0) nested_sl2go = StratifiedShuffleGroupSplit(2) clf = LDA() f_select = EFS( estimator=clf, max_features=x_train.shape[-1], cv=nested_sl2go, n_jobs=-1, ) f_select = f_select.fit(x_train, y_train, groups) best_idx = f_select.best_idx_ best_freqs.append(list(FREQS[list(best_idx)])) best_scores.append(f_select.best_score_) test_clf = LDA() test_clf.fit(x_train[:, best_idx], y_train) y_test = [[label] * len(x_test[i]) for i, label in enumerate(y_test)] y_test, groups = create_groups(y_test) x_test = np.concatenate(x_test[:], axis=0) test_score = test_clf.score(x_test[:, best_idx], y_test) test_scores.append(test_score) if PERM: pscores_cv = [] for _ in range(N_PERM): y_train = np.random.permutation(y_train) y_test = np.random.permutation(y_test) clf = LDA() clf.fit(x_train[:, best_idx], y_train) pscore = clf.score(x_test[:, best_idx], y_test) pscores_cv.append(pscore) pvalue = compute_pval(test_score, pscores_cv) pvalues.append(pvalue) pscores.append(pscores_cv) score = np.mean(test_scores) data = { "score": score, "train_scores": best_scores, "test_scores": test_scores, "freqs": best_freqs, "pvalue": pvalues, "pscores": pscores, } savemat(results_file_path, data)
def main(state, elec): """Permutations. For each separation of subjects with leave 2 subjects out, we train on the big set and test on the two remaining subjects. for each permutation, we just permute the labels at the trial level (we could use permutations at the subject level, but we wouldn't get as many permutations) """ file_name = f"EFS_NoGamma_{state}_{elec}_{WINDOW}_{OVERLAP:.2f}.mat" print(file_name) file_path = RESULT_PATH / file_name data = loadmat(file_path) lil_labels = [0] * 18 + [1] * 18 lil_labels = np.asarray(lil_labels) lil_groups = list(range(36)) sl2go = StratifiedLeave2GroupsOut() best_freqs = list(data["freqs"].ravel()) scores = list(data["test_scores"].ravel()) data = load_data(state, elec) pscores = [] pvalues = [] i = 0 for train_subjects, test_subjects in sl2go.split(data, lil_labels, lil_groups): x_feature, x_classif = data[train_subjects], data[test_subjects] y_feature = lil_labels[train_subjects] y_classif = lil_labels[test_subjects] y_feature = [ np.array([label] * x_feature[i].shape[1]) for i, label in enumerate(y_feature) ] y_feature, _ = create_groups(y_feature) y_classif = [ np.array([label] * x_classif[i].shape[1]) for i, label in enumerate(y_classif) ] y_classif, _ = create_groups(y_classif) print(best_freqs[i]) best_idx = [ FREQS.index(value.strip().capitalize()) for value in best_freqs[i] ] x_classif = np.concatenate(x_classif[:], axis=1).T x_feature = np.concatenate(x_feature[:], axis=1).T for _ in range(N_PERM): y_feature = np.random.permutation(y_feature) y_classif = np.random.permutation(y_classif) clf = LDA() clf.fit(x_feature[:, best_idx], y_feature) pscore = clf.score(x_classif[:, best_idx], y_classif) pscores.append(pscore) score = scores[i] pvalue = compute_pval(score, pscores) pvalues.append(pvalue) i += 1 data["pvalue"] = pvalues data["pscores"] = pscores savemat(file_path, data)