def run_graph(name): BASE = "data" DATASET = "%s/graph" % (name) root = os.path.join(BASE, DATASET) bp = lds.BIDSParser(root) dataset_descriptor = bp.getModalityFrame("func", ".edgelist") gds = lds.GraphDataSet(dataset_descriptor) # Create a lemur distance matrix based on the graph data DM = lds.DistanceMatrix(gds, lms.FroCorr) DM.name = "graph-DistanceMatrix" with open(os.path.join(BASE, name, 'graph_dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) Graph_Embedded = MDSEmbedder.embed(DM) with open(os.path.join(BASE, name, 'graph_embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(Graph_Embedded, pkl_loc) ##### Clustering Graph_Embedded = lcl.HGMMClustering(Graph_Embedded, 4) Graph_Embedded.cluster() with open(os.path.join(BASE, name, 'graph_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(Graph_Embedded, pkl_loc)
def run_pheno(name): BASE = "data" # Create a lemur dataset based on the phenotypic data DATASET = os.path.split(os.path.split(name)[0])[1] root = os.path.join(BASE, DATASET, 'pheno') CDS = lds.CSVDataSet(name, name = DATASET) # metadata = CDS.saveMetaData(os.path.join("data", DATASET, "metadata.json")) CDS.imputeColumns("mean") DM = lds.DistanceMatrix(CDS, lms.VectorDifferenceNorm) # Set output paths for saved plots. with open(os.path.join(root, 'dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) HBN_Embedded = MDSEmbedder.embed(DM) with open(os.path.join(root, 'embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(HBN_Embedded, pkl_loc) hgmm = lcl.HGMMClustering(HBN_Embedded, 4) hgmm.cluster() with open(os.path.join(root, 'hgmm_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(hgmm, pkl_loc) clustered = lcl.AdaptiveKMeans(HBN_Embedded) clustered.cluster() with open(os.path.join(root, 'km_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc)
def run_eeg(name): BASE = "data" DATASET = "%s/eeg" % (name) root = os.path.join(BASE, DATASET) bp = lds.BIDSParser(root) dataset_descriptor = bp.getModalityFrame("preprocessed", ".pkl").iloc[:6] # out_base = os.path.join(BASE, name, "eeg_derivatives") # out_emb_base = os.path.join(BASE, name, "eeg_embedded_deriatives") # os.makedirs(out_base + "/agg", exist_ok=True) # os.makedirs(out_emb_base + "/agg", exist_ok=True) eds = lds.EEGDataSet(dataset_descriptor) with open(os.path.join(BASE, name, 'eeg_ds.pkl'), 'wb') as pkl_loc: pkl.dump(eds, pkl_loc) # Create a lemur distance matrix based on the EEG data DM = lds.DistanceMatrix(eds, lms.FroCorr) DM.name = "eeg-DistanceMatrix" with open(os.path.join(BASE, name, 'eeg_dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) EEG_Embedded = MDSEmbedder.embed(DM) with open(os.path.join(BASE, name, 'eeg_embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(EEG_Embedded, pkl_loc) hgmm = lcl.HGMMClustering(EEG_Embedded, 4) hgmm.cluster() with open(os.path.join(root, 'hgmm_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(hgmm, pkl_loc) clustered = lcl.AdaptiveKMeans(EEG_Embedded) clustered.cluster() with open(os.path.join(root, 'km_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc) chanlocs = pd.read_csv("data/%s/eeg/chanlocs.csv" % (name)) with open(os.path.join(BASE, name, 'eeg_chanlocs.pkl'), 'wb') as pkl_loc: pkl.dump(chanlocs.as_matrix()[:, 1:4], pkl_loc) spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial") spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm) with open(os.path.join(BASE, name, 'eeg_spatial_dm.pkl'), 'wb') as pkl_loc: pkl.dump(spatialDM, pkl_loc)
def run_fmri(name): BASE = "data" DATASET = "%s/fmri" % (name) root = os.path.join(BASE, DATASET) bp = lds.BIDSParser(root) dataset_descriptor = bp.getModalityFrame("func", "nii.gz") fds = lds.fMRIDataSet(dataset_descriptor) print('check_point0') # Create a lemur distance matrix based on the EEG data DM = lds.DistanceMatrix(fds, lms.DiffAve, True) DM.name = "fmri-DistanceMatrix" print('check_point1') with open(os.path.join(BASE, name, 'fmri_dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) print('check_point2') fMRI_Embedded = MDSEmbedder.embed(DM) with open(os.path.join(BASE, name, 'fmri_embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(fMRI_Embedded, pkl_loc) print('finished')
out_base = os.path.join(BASE, "eeg_derivatives") out_emb_base = os.path.join(BASE, "eeg_embedded_deriatives") os.makedirs(out_base + "/agg", exist_ok=True) os.makedirs(out_emb_base + "/agg", exist_ok=True) # In[2]: eds = lds.EEGDataSet(dataset_descriptor) # Create a lemur distance matrix based on the EEG data DM = lds.DistanceMatrix(eds, lms.FroCorr) DM.name = "eeg-DistanceMatrix" # In[3]: # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) EEG_Embedded = MDSEmbedder.embed(DM) # In[4]: chanlocs = pd.read_csv("data/chanlocs.csv") spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial") spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm) # In[5]: for i in range(eds.n): single_ds = eds.getResourceDS(i) lpl.SparkLinePlotter(single_ds, mode="savediv", base_path=out_base).plot(sample_freq=500)
def run_modality(name, modality): # Set root paths and parse data BASE = 'data' DATASET = '%s/%s' % (name, modality) root = os.path.join(BASE, DATASET) bp = lds.BIDSParser(root) # For each modality, set specific settings if modality == 'eeg': modality_list = [('preprocessed', '.pkl')] DS_type = lds.EEGDataSet metric = lms.FroCorr elif modality == 'fmri': modality_list = [('func', 'nii.gz')] DS_type = lds.fMRIDataSet metric = lms.DiffAve elif modality == 'graph': modality_list = [('func', '.edgelist')] DS_type = lds.GraphDataSet metric = lms.FroCorr else: raise ValueError('Needs to be eeg, fmri, or graph') # If EEG, save important metadata pkls if modality == 'eeg': chanlocs = pd.read_csv("data/%s/eeg/chanlocs.csv" % (name)) with open(os.path.join("data/%s/eeg" % (name), 'chanlocs.pkl'), 'wb') as pkl_loc: pkl.dump(chanlocs.as_matrix()[:, 1:4], pkl_loc) spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial") spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm) with open(os.path.join("data/%s/eeg" % (name), 'spatial_dm.pkl'), 'wb') as pkl_loc: pkl.dump(spatialDM, pkl_loc) # Iterate through potential files for datatype, f_ext in modality_list: dataset_descriptor = bp.getModalityFrame(datatype, f_ext) DS = DS_type(dataset_descriptor) print(DS) curr_dir = os.path.join(root, datatype) if os.path.exists(curr_dir): continue else: os.makedirs(curr_dir) # Save the dataset with open(os.path.join(curr_dir, 'ds.pkl'), 'wb') as pkl_loc: pkl.dump(DS, pkl_loc) # Create a lemur distance matrix if modality == 'fmri': DM = lds.DistanceMatrix(DS, metric, True) else: DM = lds.DistanceMatrix(DS, metric) DM.name = "%s-DistanceMatrix" % (modality) with open(os.path.join(curr_dir, 'dm.pkl'), 'wb') as pkl_loc: pkl.dump(DM, pkl_loc) # Create an embedded distance matrix object under MDS MDSEmbedder = leb.MDSEmbedder(num_components=10) embedded = MDSEmbedder.embed(DM) with open(os.path.join(curr_dir, 'embed_dm.pkl'), 'wb') as pkl_loc: pkl.dump(embedded, pkl_loc) ##### Clustering if DS.n > 10: clustered = lcl.HGMMClustering(embedded, 4) clustered.cluster() with open(os.path.join(curr_dir, 'hgmm_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc) clustered = lcl.AdaptiveKMeans(embedded) clustered.cluster() with open(os.path.join(curr_dir, 'km_clust_dm.pkl'), 'wb') as pkl_loc: pkl.dump(clustered, pkl_loc) # Return modality list return bp, modality_list