def load_agg_data(subject, word, experiment, voice, proc, rep_set): all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=1, reps_filter=lambda nr: [rep in rep_set for rep in range(nr)], sensor_type=None, is_region_sorted=False, tmin=TMIN[word], tmax=TMAX[word]) all_data *= 1e12 stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] voice_labels = [] if 'full' not in word: data = np.ones( (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2])) else: data = all_data i_data = 0 for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if 'full' in word: labels.append(word_list[WORD_COLS[curr_voice][word]]) voice_labels.append(curr_voice) elif len(word_list) > 5: data[i_data, :, :] = all_data[i_sen_int, :, :] labels.append(word_list[WORD_COLS[curr_voice][word]]) voice_labels.append(curr_voice) i_data += 1 print(labels) print(voice_labels) return data, labels, voice_labels, time
def run_tgm_exp(experiment, subject, sen_type, word, win_len, overlap, fold, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, reps_to_use=10, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rep=reps_to_use, rsP=random_state_perm, fold=fold) old_fname = OLD_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rep=reps_to_use, rsP=random_state_perm, fold=fold) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return elif os.path.isfile(old_fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2( subject=subject, align_to=word, voice=sen_type, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=reps_to_use, sensor_type=None, is_region_sorted=False, tmin=None, tmax=None) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_diag_loso_fold( data, labels, win_starts, win_len, sen_ints, fold, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, subject, sen_type, word, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, reps_to_use=10, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False, mode='acc'): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rep=reps_to_use, rsP=random_state_perm, mode=mode) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2( subject=subject, align_to=word, voice=sen_type, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=reps_to_use, sensor_type=None, is_region_sorted=False, tmin=None, tmax=None) # if experiment=='PassAct3': # data, labels, time, final_inds = load_data.load_PassAct3_matlab(subject=subject, # sen_type=sen_type, # num_instances=num_instances, # reps_to_use=reps_to_use, # noMag=False, # sorted_inds=None) # new_labels = [lab if len(lab) > 2 else [lab[0], lab[1], ''] for lab in labels] # labels = np.array(new_labels) # print(data.shape) # print(labels.shape) # else: # data, labels, time, final_inds = load_data.load_sentence_data(subject=subject, # word='noun1', # sen_type=sen_type, # experiment=experiment, # proc=proc, # num_instances=num_instances, # reps_to_use=reps_to_use, # noMag=False, # sorted_inds=None) # print(data.shape) # print(np.array(labels).shape) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) # sen_set = np.unique(labels, axis=0).tolist() # num_labels = labels.shape[0] # sen_ints = np.empty((num_labels,)) # for i_l in range(num_labels): # for j_l, l in enumerate(sen_set): # if np.all(l == labels[i_l, :]): # sen_ints[i_l] = j_l # break # labels = labels[:, WORD_COLS[experiment][word]] if isPerm: random.seed(random_state_perm) random.shuffle(labels) if mode == 'acc': l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc) else: l_ints, coef, Cs = models.lr_tgm_coef(data, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) np.savez_compressed(fname, l_ints=l_ints, coef=coef, Cs=Cs, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, subject, word, win_len, overlap, dist='cosine', doTimeAvg=False, proc=load_data.DEFAULT_PROC, force=False): # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, word=word, win_len=win_len, ov=overlap, dist=dist, avgTm=bool_to_str(doTimeAvg)) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return voice = ['active', 'passive'] num_instances = 1 all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=TMIN[word], tmax=TMAX[word]) all_data *= 1e12 stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] voice_labels = [] if word != 'eos-full': data = np.ones( (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2])) else: data = all_data i_data = 0 for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'eos-full': labels.append(word_list[-2]) voice_labels.append(curr_voice) elif len(word_list) > 5: data[i_data, :, :] = all_data[i_sen_int, :, :] labels.append(word_list[WORD_COLS[curr_voice][word]]) voice_labels.append(curr_voice) i_data += 1 print(labels) print(voice_labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) n_time = data.shape[2] windows = [ np.array([i >= w_s and i < w_s + win_len for i in xrange(n_time)]) for w_s in win_starts ] n_w = len(windows) RDM = [] for wi in xrange(n_w): time_to_use = windows[wi] data_to_use = data[:, :, time_to_use] if doTimeAvg: data_to_use = np.mean(data_to_use, axis=2) else: data_to_use = np.reshape(data_to_use, (data_to_use.shape[0], -1)) curr_RDM = squareform(pdist(data_to_use, metric=dist)) if np.any(np.isnan(curr_RDM)): print('Data state:') print(np.any(np.isinf(data_to_use))) print(np.any(np.isnan(data_to_use))) print(np.min(data_to_use)) print(np.min(np.abs(data_to_use))) meow = pdist(data_to_use, metric=my_cosine) nan_els = np.unravel_index(np.where(np.isnan(meow)), curr_RDM.shape) # print(nan_els) print('My cosine:') print my_cosine(data_to_use[nan_els[0][0][0], :], data_to_use[nan_els[1][0][0], :]) RDM.append(curr_RDM[None, ...]) RDM = np.concatenate(RDM, axis=0) np.savez_compressed(fname, RDM=RDM, labels=labels, voice_labels=voice_labels, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, subject, sen_type, word, win_len, overlap, fold, isPerm = False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, fold=fold) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = [sen_type] data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=TMAX[experiment]) stimuli_voice = list(load_data.read_stimuli(experiment)) # print(stimuli_voice) if word == 'propid': all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints] all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints] content_words = [] valid_inds = [] for i_word_list, word_list in enumerate(all_words): curr_voice = all_voices[i_word_list] if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_word_list) content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) else: valid_inds.append(i_word_list) content_words.append( [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True) print(uni_content) else: labels = [] valid_inds = [] for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'voice': labels.append(curr_voice) valid_inds.append(i_sen_int) elif word == 'senlen': if len(word_list) > 5: labels.append('long') else: labels.append('short') valid_inds.append(i_sen_int) elif word == 'agent' or word == 'patient': if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_sen_int) labels.append(word_list[WORD_COLS[curr_voice][word]]) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) valid_inds = np.array(valid_inds) data = data[valid_inds, ...] sen_ints = [sen for i_sen, sen in enumerate(sen_ints) if i_sen in valid_inds] # print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_fold(data, labels, win_starts, win_len, sen_ints, fold, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, sen_type, word, win_len, overlap, fold, isPerm = False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) fname = SAVE_FILE.format(dir=top_dir, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, fold=fold) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return stimuli_voice = list(load_data.read_stimuli(experiment)) data_list = [] sen_ints = [] time = [] labels = [] for i_sub, subject in enumerate(VALID_SUBS[experiment]): data, _, sen_ints_sub, time_sub, sensor_regions = load_data.load_sentence_data_v2(subject=subject, align_to='noun1', voice=sen_type, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=lambda x: [i for i in range(x) if i < 10], sensor_type=None, is_region_sorted=False, tmin=TIME_LIMITS[experiment][sen_type]['noun1']['tmin'], tmax=TIME_LIMITS[experiment][sen_type]['noun1']['tmax']) # print(labels_sub) # print(data.shape) valid_inds = [] labels_sub = [] new_sen_ints_sub = [] for i_sen_int, sen_int in enumerate(sen_ints_sub): word_list = stimuli_voice[sen_int]['stimulus'].split() if word == 'noun2': if len(word_list) > 5: labels_sub.append(word_list[WORD_COLS[sen_type][word]]) valid_inds.append(i_sen_int) new_sen_ints_sub.append(sen_int) else: labels_sub.append(word_list[WORD_COLS[sen_type][word]]) valid_inds.append(i_sen_int) new_sen_ints_sub.append(sen_int) valid_inds = np.array(valid_inds) print(labels_sub) data_list.append(data[valid_inds, ...]) print(data_list[i_sub].shape) if i_sub == 0: sen_ints = new_sen_ints_sub time = time_sub labels = labels_sub else: assert np.all(sen_ints == new_sen_ints_sub) assert np.all(time == time_sub) assert np.all(np.array(labels) == np.array(labels_sub)) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) # print(win_starts) print(sen_ints) print(labels) print(data_list[0].shape) if isPerm: random.seed(random_state_perm) random.shuffle(labels) l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_multisub_fold(data_list, labels, win_starts, win_len, sen_ints, fold, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, sen_type, word, win_len, overlap, alg='lr-l1', adj=None, doTimeAvg=False, num_instances=1, proc=load_data.DEFAULT_PROC, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) fname = SAVE_FILE.format(dir=top_dir, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), inst=num_instances) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = [sen_type] data_list = [] sen_ints = [] time = [] for i_sub, subject in enumerate(VALID_SUBS[experiment]): data, _, sen_ints_sub, time_sub, _ = load_data.load_sentence_data_v2(subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=TMAX[experiment]) data_list.append(data) if i_sub == 0: sen_ints = sen_ints_sub time = time_sub else: assert np.all(sen_ints == sen_ints_sub) assert np.all(time == time) stimuli_voice = list(load_data.read_stimuli(experiment)) # print(stimuli_voice) if word == 'propid': all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints] all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints] content_words = [] valid_inds = [] for i_word_list, word_list in enumerate(all_words): curr_voice = all_voices[i_word_list] if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_word_list) content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) else: valid_inds.append(i_word_list) content_words.append( [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True) print(uni_content) else: labels = [] valid_inds = [] for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'voice': labels.append(curr_voice) valid_inds.append(i_sen_int) elif word == 'senlen': if len(word_list) > 5: labels.append('long') else: labels.append('short') valid_inds.append(i_sen_int) elif word == 'agent' or word == 'patient': if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_sen_int) labels.append(word_list[WORD_COLS[curr_voice][word]]) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) valid_inds = np.array(valid_inds) data_list = [data[valid_inds, ...] for data in data_list] # print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) coef, Cs, haufe_maps = models.lr_tgm_loso_multisub_coef(data_list, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) print('Saving {}'.format(fname)) np.savez_compressed(fname, coef=coef, Cs=Cs, haufe_maps=haufe_maps, win_starts=win_starts, time=time, proc=proc)
win_starts, win_len, feature_select=doFeatSelect, doZscore=doZscore, doAvg=doTimeAvg, ddof=1) if __name__ == '__main__': data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject='B', align_to='last', voice=['active', 'passive'], experiment='krns2', proc=load_data.DEFAULT_PROC, num_instances=1, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.5, tmax=1.0) stimuli_voice = list(load_data.read_stimuli('krns2')) labels = [] for i_sen_int, sen_int in enumerate(sen_ints): curr_voice = stimuli_voice[sen_int]['voice'] labels.append(curr_voice) print(labels) tmin = time.min() tmax = time.max()
inds_to_plot = np.logical_and(time_old >= (tmin + 0.5), time_old <= (tmax + 0.5)) print(inds_to_plot.shape) print(time_old.shape) print(data_old.shape) data_old = data_old[:, :, inds_to_plot] time_old = time_old[inds_to_plot] # subject, align_to, voice, experiment, proc, num_instances, reps_filter, # sensor_type = None, is_region_sorted = True): data_new, labels_new, indices_in_master_experiment_stimuli, time_new, sensor_regions = load_data_new.load_sentence_data_v2( subject='A', align_to='noun1', voice=('active', 'passive'), experiment='PassAct3', proc=None, num_instances=2, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=tmin, tmax=tmax) print(data_new.shape) print(labels_new) print(indices_in_master_experiment_stimuli) def num_stimulus_words(stimuli_dict_): return len([ w for w in load_data_new.punctuation_regex.sub( '', stimuli_dict_['stimulus']).strip().split() if len(w.strip()) > 0
def run_tgm_exp(subject, sen_type, analysis, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, random_state_perm=1, force=False, mode='acc'): warnings.filterwarnings(action='ignore') # Save Directory if not os.path.exists(TOP_DIR): os.mkdir(TOP_DIR) save_dir = SAVE_DIR.format(top_dir=TOP_DIR, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, analysis=analysis, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, mode=mode) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = sen_type experiment = 'krns2' data_det1, _, sen_ints_det1, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data_n1, _, sen_ints_n1, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) data_det2, _, sen_ints_det2, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data_n2, _, sen_ints_n2, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] for sen_int in sen_ints_det1: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det1']]) if 'det-type' not in analysis: for sen_int in sen_ints_n1: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['noun1']]) for sen_int in sen_ints_det2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det2']]) for sen_int in sen_ints_n2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['noun2']]) sen_ints = np.concatenate( [sen_ints_det1, sen_ints_n1, sen_ints_det2, sen_ints_n2], axis=0) data = np.concatenate([data_det1, data_n1, data_det2, data_n2], axis=0) elif analysis == 'det-type-first': sen_ints = np.array(sen_ints_det1) data = data_det1 else: for sen_int in sen_ints_det2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det2']]) sen_ints = np.concatenate([sen_ints_det1, sen_ints_det2], axis=0) data = np.concatenate([data_det1, data_det2], axis=0) inds_to_keep = np.ones((len(labels), ), dtype=bool) if analysis == 'the-dog': for i_label, label in enumerate(labels): if label != 'the' and label != 'dog': inds_to_keep[i_label] = False elif analysis == 'a-dog': for i_label, label in enumerate(labels): if label != 'a' and label != 'dog': inds_to_keep[i_label] = False data = data[inds_to_keep, :, :] sen_ints = sen_ints[inds_to_keep] new_labels = [ labels[i_label] for i_label, _ in enumerate(labels) if inds_to_keep[i_label] ] print(np.unique(np.array(new_labels))) total_win = data.shape[-1] win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if mode == 'acc': l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, new_labels, win_starts, win_len, sen_ints, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time) else: l_ints, coef, Cs = models.lr_tgm_coef(data, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) np.savez_compressed(fname, l_ints=l_ints, coef=coef, Cs=Cs, win_starts=win_starts, time=time)
import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt from syntax_vs_semantics import load_data import numpy as np if __name__ == '__main__': for sen_type in ['active', 'passive']: for word in ['noun1', 'verb', 'last']: data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2( subject='B', align_to=word, voice=sen_type, experiment='krns2', proc=load_data.DEFAULT_PROC, num_instances=1, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=3.0) data_to_plot = np.squeeze(np.mean(data, axis=0)) fig, ax = plt.subplots() h = ax.imshow(data_to_plot, interpolation='nearest', aspect='auto') ax.set_title('{} {}'.format(sen_type, word)) plt.show()
def run_tgm_exp(experiment, sen_type, word, win_len, overlap, alg='lr-l1', adj=None, doTimeAvg=False, num_instances=1, proc=load_data.DEFAULT_PROC, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) fname = SAVE_FILE.format(dir=top_dir, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), inst=num_instances) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return data_list = [] sen_ints = [] time = [] labels = [] for i_sub, subject in enumerate(VALID_SUBS[experiment]): data, labels_sub, sen_ints_sub, time_sub, sensor_regions = load_data.load_sentence_data_v2( subject=subject, align_to=word, voice=sen_type, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=lambda x: [i for i in range(x) if i < 10], sensor_type=None, is_region_sorted=False, tmin=TIME_LIMITS[experiment][sen_type][word]['tmin'], tmax=TIME_LIMITS[experiment][sen_type][word]['tmax']) data_list.append(data) if i_sub == 0: sen_ints = sen_ints_sub time = time_sub labels = labels_sub else: assert np.all(sen_ints == sen_ints_sub) assert np.all(time == time_sub) assert np.all(np.array(labels) == np.array(labels_sub)) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) coef, Cs, haufe_maps = models.lr_tgm_loso_multisub_coef( data_list, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) print('Saving {}'.format(fname)) np.savez_compressed(fname, coef=coef, Cs=Cs, haufe_maps=haufe_maps, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, subject, win_len, overlap, fold, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, fold=fold) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return n1_data, labels, n1_sen_ints, n1_time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=['active', 'passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=lambda x: [i for i in range(x) if i < 10], sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) n1_labels = [NEW_LABELS[experiment][lab] for lab in labels] n2_data, labels, n2_sen_ints, n2_time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=['active', 'passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=lambda x: [i for i in range(x) if i < 10], sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=1.0) n2_labels = [NEW_LABELS[experiment][lab] for lab in labels] n1_tmin = n1_time.min() n1_tmax = n1_time.max() n1_total_win = int((n1_tmax - n1_tmin) * 500) n1_win_starts = range(0, n1_total_win - win_len, overlap) n2_tmin = n2_time.min() n2_tmax = n2_time.max() n2_total_win = int((n2_tmax - n2_tmin) * 500) n2_win_starts = range(0, n2_total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(n1_labels) random.shuffle(n2_labels) l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_cross_tgm_loso_fold( data_list=[n1_data, n2_data], labels_list=[n1_labels, n2_labels], win_starts_list=[n1_win_starts, n2_win_starts], win_len=win_len, sen_ints_list=[n1_sen_ints, n2_sen_ints], fold=fold, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, n1_win_starts=n1_win_starts, n2_win_starts=n2_win_starts, n1_time=n1_time, n2_time=n2_time, proc=proc)
def run_tgm_exp(experiment, subject, word, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if word == 'voice': data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=['active', 'passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.5, tmax=1.0) else: data_act, _, sen_ints_act, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='verb', voice=['active'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) data_pass, _, sen_ints_pass, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='verb', voice=['passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data = np.concatenate([data_act, data_pass], axis=0) sen_ints = np.concatenate([sen_ints_act, sen_ints_pass], axis=0) stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] for i_sen_int, sen_int in enumerate(sen_ints): curr_voice = stimuli_voice[sen_int]['voice'] labels.append(curr_voice) print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if 'lr' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=str_to_none(alg[3:]), adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) elif 'svm' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso( data, labels, win_starts, win_len, sen_ints, sub_rs=1, penalty=alg[4:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg, ddof=1, C=None) else: if adj == 'zscore': doZscore = True else: doZscore = False if 'None' in alg: doFeatSelect = False else: doFeatSelect = True tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso( data, labels, sen_ints, 1, win_starts, win_len, feature_select=doFeatSelect, doZscore=doZscore, doAvg=doTimeAvg, ddof=1) tgm_acc = tgm_from_preds_GNB(tgm_pred, l_ints, cv_membership) print('Max Acc: %.2f' % np.max(np.mean(tgm_acc, axis=0))) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)
inst_grid = AxesGrid(inst_fig, 111, nrows_ncols=(len(inst_list), 1), axes_pad=0.7, cbar_mode='single', cbar_location='right', cbar_pad=0.5, cbar_size='2%', share_all=True) for i_inst, num_instances in enumerate(inst_list): data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=[sen_type], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=lambda x: [i for i in range(x) if i < 10], sensor_type=None, is_region_sorted=False, tmin=-1.0, tmax=4.5) # time_to_plot = range(180, 254) # data = data[:, :, time_to_plot] # time = time[time_to_plot] if num_instances == 1: data = np.squeeze(data[sen_list[0], :, :]) else: data = np.squeeze(data[sen_id, :, :]) data_to_plot = data[sorted_inds, ::2] print(np.max(data_to_plot))
ni=num_instances, tmin=tmin, tmax=tmax, i_sensor=i_sensor, voice=args.voice) if os.path.isfile(result_fname) and not str_to_bool(args.force): print('Job already completed. Skipping Job.') print(result_fname) else: data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2( subject=sub, align_to='last', voice=voice, experiment=exp, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=tmin, tmax=tmax) data = data * 1e12 sen_ints = np.array(sen_ints) if i_sensor < 0: do_transpose = False i_sensor = np.ones((data.shape[1], ), dtype=bool) else: do_transpose = True sen0_data = data[sen_ints == sen0, ...]