def time_segment_matching_accuracy(data, win_size=6): nsubjs = len(data) (ndim, nsample) = data[0].shape accu = np.zeros(shape=nsubjs) nseg = nsample - win_size # mysseg prediction prediction trn_data = np.zeros((ndim*win_size, nseg),order='f') # the trn data also include the tst data, but will be subtracted when # calculating A for m in range(nsubjs): for w in range(win_size): trn_data[w*ndim:(w+1)*ndim,:] += data[m][:,w:(w+nseg)] for tst_subj in range(nsubjs): tst_data = np.zeros((ndim*win_size, nseg),order='f') for w in range(win_size): tst_data[w*ndim:(w+1)*ndim,:] = data[tst_subj][:,w:(w+nseg)] A = np.nan_to_num(stats.zscore((trn_data - tst_data),axis=0, ddof=1)) B = np.nan_to_num(stats.zscore(tst_data,axis=0, ddof=1)) # compute correlation matrix corr_mtx = compute_correlation(B.T,A.T) for i in range(nseg): for j in range(nseg): if abs(i-j)<win_size and i != j : corr_mtx[i,j] = -np.inf max_idx = np.argmax(corr_mtx, axis=1) accu[tst_subj] = sum(max_idx == range(nseg)) / float(nseg) return accu
def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"): store_file = "/ahba_data/store_max1_reduced.h5" subcortex_mask = "/ahba_data/subcortex_mask.npy" results_dfs = [] with pd.HDFStore(store_file, 'r') as store: for donor_id in store.keys(): print "Loading expression data (%s)" % donor_id expression_data = store.get(donor_id.replace(".", "_")) print "Getting statmap values (%s)" % donor_id nifti_values = reduced_stat_map_data[expression_data.columns] print "Removing missing values (%s)" % donor_id na_mask = np.isnan(nifti_values) if mask == "subcortex": na_mask = np.logical_or(na_mask, np.isnan(np.load(subcortex_mask)[expression_data.columns])) elif mask == "cortex": na_mask = np.logical_or(na_mask, np.logical_not(np.isnan( np.load(subcortex_mask)[expression_data.columns]))) else: assert mask == "full" nifti_values = np.array(nifti_values)[np.logical_not(na_mask)] expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True) print "z scoring (%s)" % donor_id expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns, index=expression_data.index) nifti_values = zscore(nifti_values) print "Calculating linear regressions (%s)" % donor_id regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T) results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index) results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns], names=['donor_id', 'parameter']) results_dfs.append(results_df) print "Concatenating results" results_df = pd.concat(results_dfs, axis=1) del results_dfs t, p = ttest_1samp(results_df, 0.0, axis=1) group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index) _, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh') group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1) group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1) del results_df probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1) group_results_df = group_results_df.join(probe_info) group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)", "variance explained (mean)", "variance explained (std)"]] return group_results_df
def run(self): self.results = [] self.loader = ConnectivityDataLoader() self.X, self.y = self.loader.setup_analysis(self.path, self.roi_list, self.directory, self.condition_list, self.subjects).filter(self.filter_).get_data() X = self.X y = self.y X = zscore(X, axis=1) # Sample-wise y = zscore(np.float_(y)) self.fs = FeatureSelectionIterator() self.fs.setup_analysis(self.fs_algorithm, self.fs_ranking_fx).run(X, y).select_first(80) self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, self.learner, self.error_fx) #Speedup stuff schema = ShuffleSplit(12, n_iter=1, test_size=0.25) self.perm_reg = RegressionAnalysis().setup_analysis(schema, self.learner, self.error_fx) self.perm = PermutationAnalysis().setup_analysis(self.reg, n_permutation=self.n_permutations, dimension='features') for i,set_ in enumerate(self.fs): if i > 78: X_ = X[:,set_] y_ = y reg_res = self.reg.run(X_, y_) # To be selected n_dist = self.perm.run(X_, y_) p_res = self.perm.pvalues(reg_res) self.results.append([reg_res, n_dist, p_res]) #self.save() return self.results
def sfn(l, msk, myrad, bcast_var): # Arguments: # l -- a list of 4D arrays, containing data from a single searchlight # msk -- a 3D binary array, mask of this searchlight # myrad -- an integer, sl_rad # bcast_var -- whatever is broadcasted # extract training and testing data train_data = [] test_data = [] d1,d2,d3,ntr = l[0].shape nvx = d1*d2*d3 for s in l: train_data.append(np.reshape(s[:,:,:,:int(ntr/2)],(nvx,int(ntr/2)))) test_data.append(np.reshape(s[:,:,:,int(ntr/2):],(nvx,ntr-int(ntr/2)))) # train an srm model srm = SRM(bcast_var[0],bcast_var[1]) srm.fit(train_data) # transform test data shared_data = srm.transform(test_data) for s in range(len(l)): shared_data[s] = np.nan_to_num(stats.zscore(shared_data[s],axis=1,ddof=1)) # run experiment accu = time_segment_matching_accuracy(shared_data) # return: can also return several values. In that case, the final output will be # a 3D array of tuples return np.mean(accu)
def run(self): self.loader = ConnectivityDataLoader() self.X, self.y = self.loader.setup_analysis(self.path, self.roi_list, self.directory, self.condition_list, self.subjects).filter(self.filter_).get_data() X = self.X y = self.y X = zscore(X, axis=1) # Sample-wise y = zscore(np.float_(y)) self.fs = FeatureSelectionIterator().setup_analysis(self.fs_algorithm, self.fs_ranking_fx) self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, self.learner, self.error_fx, feature_selection=self.fs) self.perm = PermutationAnalysis().setup_analysis(self.reg, n_permutation=self.n_permutations, dimension='labels') self.results = [] reg_res = self.reg.run(X, y) # To be selected perm_res = self.perm.run(X, y) self.results.append([reg_res, perm_res]) #self.save() return self.results
def zscore_function(rep): """ This function applies the zscore() transform to every value in the replications. Args: rep : LIST[rep1, rep2, rep3, ...] A repN is a biological data used to calc the likelihood result Returns: LIST[rep1, rep2, rep3, ...] The new transformed replications """ rep = map(lambda x: np.asmatrix(np.transpose(np.array([sss.zscore(item) for item in np.transpose(np.asarray(x))]))), rep) return list(rep)
def run(self): self.loader = ConnectivityDataLoader() self.X, self.y = self.loader.setup_analysis(self.path, self.roi_list, self.directory, self.condition_list, self.subjects).filter(self.filter_).get_data() X = self.X y = self.y X = zscore(X, axis=1) # Sample-wise y = zscore(np.float_(y)) self.fs = FeatureSelectionIterator() self.fs.setup_analysis(self.fs_algorithm, self.fs_ranking_fx).run(X, y).select_first(80) self.reg = RegressionAnalysis().setup_analysis(self.cv_schema, self.learner, self.error_fx) self.results = [] for set_ in self.fs: X_ = X[:,set_] y_ = y reg_res = self.reg.run(X_, y_) # To be selected self.results.append([reg_res]) self.save() return self.results
def add_normalized_te(self, normed_prefix="norm"): """ z-score normalize the TE values. Creates new columns corresponding to normed TEs, beginning with 'normed_prefix'. Normed TEs are first logged (base 2) and then z-score normalized. """ print "Normalizing TE..." te_cols = [c for c in self.table.columns \ if c.startswith("TE_")] for col in te_cols: normed_col = "%s_%s" %(normed_prefix, col) self.table[normed_col] = zscore(self.table[col].apply(log2).dropna())
def process_input(subjects_files, mask): """Process input to obtain data suitable for SRM""" mask_suffix = "_" + mask + ".nii.gz" srm_input = [] for subject_files in subjects_files: srm_input_subject = [] for path in subject_files: if path.name.endswith(mask_suffix): continue img = nib.load(str(path)) mask_path = \ str(path.with_suffix("").with_suffix("")) + mask_suffix mask = nib.load(str(mask_path)) srm_input_subject.append(nilearn.masking.apply_mask(img, mask)) srm_input.append(stats.zscore(np.concatenate(srm_input_subject), axis=0, ddof=1).T) return srm_input
def get_feature_weights_matrix(weights, sets, mask, indices): """ Function used to compute the average weight matrix in case of several cross-validation folds and feature selection for each fold. Parameters ---------- weights : ndarray shape n_folds x n_selected_features The weights matrix with the shape specified in the signature sets : ndarray shape n_folds x n_selected_features This represents the index in the square matrix of the feature selected by the algorithm in each cross-validation fold mask : ndarray shape n_roi x n_roi The mask matrix of the valid ROIs selected. Important: this matrix should be triangular with the lower part set to zero. indices : tuple This is equal to np.nonzero(mask) Returns ------- matrix: ndarray n_roi x n_roi It returns the average weights across cross-validation fold in square form. """ weights = weights.squeeze() filling_vector = np.zeros(np.count_nonzero(mask)) counting_vector = np.zeros(np.count_nonzero(mask)) for s, w in zip(sets, weights): filling_vector[s] += zscore(w) counting_vector[s] += 1 avg_weigths = np.nan_to_num(filling_vector/counting_vector) mask[indices] = avg_weigths matrix = np.nan_to_num(copy_matrix(mask, diagonal_filler=0)) return matrix
def searchlight(coords, human_bounds, mask, subjs, song_idx, song_bounds, srm_k, hrf): """run searchlight Create searchlight object and perform voxel function at each searchlight location Parameters ---------- data1 : voxel by time ndarray (2D); leftout subject run 1 data2 : voxel by time ndarray (2D); average of others run 1 data3 : voxel by time ndarray (2D); leftout subject run 2 data4 : voxel by time ndarray (2D); average of others run 2 coords : voxel by xyz ndarray (2D, Vx3) K : # of events for HMM (scalar) Returns ------- 3D data: brain (or ROI) filled with searchlight function scores (3D) """ stride = 5 radius = 5 min_vox = srm_k nPerm = 1000 SL_allvox = [] SL_results = [] datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/data/searchlight_input/' for x in range(0, np.max(coords, axis=0)[0] + stride, stride): for y in range(0, np.max(coords, axis=0)[1] + stride, stride): for z in range(0, np.max(coords, axis=0)[2] + stride, stride): if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy'): continue D = distance.cdist(coords, np.array([x, y, z]).reshape((1, 3)))[:, 0] SL_vox = D <= radius data = [] for i in range(len(subjs)): subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy') subj_regs = np.genfromtxt(datadir + subjs[i] + '/EPI_mcf1.par') motion = subj_regs.T regr = linear_model.LinearRegression() regr.fit(motion[:, 0:2511].T, subj_data[:, :, 0].T) subj_data1 = subj_data[:, :, 0] - np.dot( regr.coef_, motion[:, 0:2511]) - regr.intercept_[:, np.newaxis] data.append( np.nan_to_num(stats.zscore(subj_data1, axis=1, ddof=1))) for i in range(len(subjs)): subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy') subj_regs = np.genfromtxt(datadir + subjs[i] + '/EPI_mcf2.par') motion = subj_regs.T regr = linear_model.LinearRegression() regr.fit(motion[:, 0:2511].T, subj_data[:, :, 1].T) subj_data2 = subj_data[:, :, 1] - np.dot( regr.coef_, motion[:, 0:2511]) - regr.intercept_[:, np.newaxis] data.append( np.nan_to_num(stats.zscore(subj_data2, axis=1, ddof=1))) print("Running Searchlight") # only run function on searchlights with voxels greater than or equal to min_vox if data[0].shape[0] >= min_vox: SL_match = HMM(data, human_bounds, song_idx, song_bounds, srm_k, hrf) SL_results.append(SL_match) SL_allvox.append(np.array(np.nonzero(SL_vox)[0])) voxmean = np.zeros((coords.shape[0], nPerm + 1)) vox_SLcount = np.zeros(coords.shape[0]) for sl in range(len(SL_results)): voxmean[SL_allvox[sl], :] += SL_results[sl] vox_SLcount[SL_allvox[sl]] += 1 voxmean = voxmean / vox_SLcount[:, np.newaxis] vox_z = np.zeros((coords.shape[0], nPerm + 1)) for p in range(nPerm + 1): vox_z[:, p] = (voxmean[:, p] - np.mean(voxmean[:, 1:], axis=1)) / np.std( voxmean[:, 1:], axis=1) return vox_z, voxmean
def searchlight(coords, mask, subjs, set_srm): """run searchlight Create searchlight object and perform voxel function at each searchlight location Parameters ---------- coords : voxel by xyz ndarray (2D, Vx3) mask : x x y x z (e.g. 91,109,91) subjs : list of subject IDs Returns ------- 3D data: brain (or ROI) filled with searchlight function scores (3D) """ stride = 5 radius = 5 min_vox = 10 nPerm = 1000 SL_allvox = [] SL_results = [] voxISC = np.zeros(coords.shape[0]) datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/data/searchlight_input/' for x in range(0, np.max(coords, axis=0)[0] + stride, stride): for y in range(0, np.max(coords, axis=0)[1] + stride, stride): for z in range(0, np.max(coords, axis=0)[2] + stride, stride): if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy'): continue D = distance.cdist(coords, np.array([x, y, z]).reshape((1, 3)))[:, 0] SL_vox = D <= radius data = [] for i in range(len(subjs)): subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy') data.append( np.nan_to_num( stats.zscore(subj_data[:, :, 0], axis=1, ddof=1))) for i in range(len(subjs)): subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy') data.append( np.nan_to_num( stats.zscore(subj_data[:, :, 1], axis=1, ddof=1))) print("Running Searchlight") SL_isc_mean_results = isc_srm(data, set_srm) SL_results.append(SL_isc_mean_results) SL_allvox.append(np.array(np.nonzero(SL_vox)[0])) voxmean = np.zeros((coords.shape[0])) vox_SLcount = np.zeros(coords.shape[0]) for sl in range(len(SL_results)): voxmean[SL_allvox[sl]] += SL_results[sl] vox_SLcount[SL_allvox[sl]] += 1 print("Voxmean: ", voxmean.shape) print("vox_SLcount: ", vox_SLcount) voxmean = voxmean / vox_SLcount return voxmean
from mpl_toolkits.mplot3d import Axes3D import matplotlib.patches as patches import numpy as np import brainiak.eventseg.event from scipy.stats import norm, zscore, pearsonr, stats from scipy.signal import gaussian, convolve from sklearn import decomposition import numpy as np from brainiak.funcalign.srm import SRM datadir = '/tigress/jamalw/MES/prototype/link/scripts/chris_dartmouth/data/' songs = ['St Pauls Suite', 'I Love Music', 'Moonlight Sonata', 'Change of the Gaurd','Waltz of Flowers','The Bird', 'Island', 'Allegro Moderato', 'Finlandia', 'Early Summer', 'Capriccio Espagnole', 'Symphony Fantastique', 'Boogie Stop Shuffle', 'My Favorite Things', 'Blue Monk','All Blues'] # Load in data train = np.nan_to_num(stats.zscore(np.load(datadir + 'A1_run1_n25.npy'),axis=1,ddof=1)) test = np.nan_to_num(stats.zscore(np.load(datadir + 'A1_run2_n25.npy'),axis=1,ddof=1)) # Convert data into lists where each element is voxels by samples train_list = [] test_list = [] for i in range(0,train.shape[2]): train_list.append(train[:,:,i]) test_list.append(test[:,:,i]) # Initialize model print('Building Model') srm = SRM(n_iter=10, features=10) # Fit model to training data (run 1) print('Training Model')
y = 1 / (1 + np.exp(-k*(x))) + c return y def logarithm(x, a, b): y = a + np.log(x + b) return y def exponential(x, k): y = np.exp(k * x) return y X = X/X.std(axis=0) y = y/y.std() X = zscore(X, axis=0) y = zscore(y, axis=0) x_ = np.linspace(-1.5, 1.5, 100) error_conn = [] mse_ = [] func = [exponential, sigmoid] shift = 200 for i in range(100): mse__ = [] pl.figure() pl.scatter(X[:,shift+i], y) for f in func: try: popt, pcov = curve_fit(f, X[:,shift + i], y) except RuntimeError:
def standardize_values(df, prop_name): std_dev = 3 z_scores = stats.zscore(df.loc[:, prop_name]) return df[np.abs(z_scores) < std_dev]
]) human_bounds = np.load(ann_dirs + songs1[song_number] + '/' + songs1[song_number] + '_beh_seg.npy') + hrf human_bounds = np.append(0, np.append(human_bounds, durs1[song_number])) start_run1 = song_bounds1[song_number] end_run1 = song_bounds1[song_number + 1] start_run2 = song_bounds2[songs2.index(songs1[song_number])] end_run2 = song_bounds2[songs2.index(songs1[song_number]) + 1] # Load in data run1 = stats.zscore(np.load(datadir + 'fdr_01_' + roi + '_split_merge_no_srm_run1_n25.npy'), axis=1, ddof=1) run2 = stats.zscore(np.load(datadir + 'fdr_01_' + roi + '_split_merge_no_srm_run2_n25.npy'), axis=1, ddof=1) #if do_srm == 0: run1DataAvg = np.mean(run1, axis=2) run2DataAvg = np.mean(run2, axis=2) song1 = run1DataAvg[:, start_run1:end_run1] song2 = run2DataAvg[:, start_run2:end_run2] #elif do_srm == 1: # Convert data into lists where each element is voxels by samples #run1_list = []
features_orb_4, features_orb_5, features_orb_6, features_orb_7, features_orb_8, features_orb_9), axis=0) else: features = np.load("features.npy") if load_features_flat: print("Loading flattened feature data") features_flatten = np.load("features_flatten.npy") data_y = np.load("data_y.npy") if limit_data: data_y = data_y[:image_count] if remove_outliers: #Remove outliers print("Removing Outliers") z = np.abs(stats.zscore(data[:, 6].astype(int))) # print(np.where(z > threshold)) print(data.shape) print(data_y.shape) data = data[(z < threshold)] data_y = data_y[(z < threshold)] print("Outliers Removed") print(data.shape) print(data_y.shape) #used to reduce the image pool to run faster tests np.random.rand(42) if limit_data:
def compute_mesh_weights(mesh, weight_type='conformal', cot_threshold=None, z_threshold=None): """ compute a weight matrix W is sparse weight matrix and W(i,j) = 0 is vertex i and vertex j are not connected in the mesh. details are presented in: Desbrun, M., Meyer, M., & Alliez, P. (2002). Intrinsic parameterizations of surface meshes. Computer Graphics Forum, 21(3), 209–218. https://doi.org/10.1111/1467-8659.00580 and Reuter, M., Biasotti, S., & Giorgi, D. (2009). Discrete Laplace–Beltrami operators for shape analysis and segmentation. Computers & …, 33(3), 381–390. https://doi.org/10.1016/j.cag.2009.03.005 additional checks and thresholds are applied to ensure finite values :param mesh: :param weight_type: choice across conformal, fem, meanvalue, authalic :param cot_threshold: :param z_threshold: :return: """ # cot_threshold=0.00001 # print('angle threshold') print(' Computing mesh weights of type ' + weight_type) vert = mesh.vertices poly = mesh.faces Nbv = vert.shape[0] W = sparse.lil_matrix((Nbv, Nbv)) femB = sparse.lil_matrix((Nbv, Nbv)) if weight_type == 'conformal' or weight_type == 'fem': threshold = 0.0001 # np.spacing(1)?? threshold_needed = 0 for i in range(3): i1 = np.mod(i, 3) i2 = np.mod(i + 1, 3) i3 = np.mod(i + 2, 3) pp = vert[poly[:, i2], :] - vert[poly[:, i1], :] qq = vert[poly[:, i3], :] - vert[poly[:, i1], :] cr = np.cross(pp, qq) area = np.sqrt(np.sum(np.power(cr, 2), 1)) / 2 # nopp = np.apply_along_axis(np.linalg.norm, 1, pp) # noqq = np.apply_along_axis(np.linalg.norm, 1, qq) noqq = np.sqrt(np.sum(qq * qq, 1)) nopp = np.sqrt(np.sum(pp * pp, 1)) thersh_nopp = np.where(nopp < threshold)[0] thersh_noqq = np.where(noqq < threshold)[0] if len(thersh_nopp) > 0: nopp[thersh_nopp] = threshold threshold_needed += len(thersh_nopp) if len(thersh_noqq) > 0: noqq[thersh_noqq] = threshold threshold_needed += len(thersh_noqq) # print(np.min(noqq)) pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose() qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose() ang = np.arccos(np.sum(pp * qq, 1)) # ############## preventing infs in weights inds_zeros = np.where(ang == 0)[0] ang[inds_zeros] = threshold threshold_needed_angle = len(inds_zeros) ################################ cot = 1 / np.tan(ang) if cot_threshold is not None: thresh_inds = cot < 0 cot[thresh_inds] = cot_threshold threshold_needed_angle += np.count_nonzero(thresh_inds) W = W + sparse.coo_matrix((cot, (poly[:, i2], poly[:, i3])), shape=(Nbv, Nbv)) W = W + sparse.coo_matrix((cot, (poly[:, i3], poly[:, i2])), shape=(Nbv, Nbv)) femB = femB + sparse.coo_matrix((area / 12, (poly[:, i2], poly[:, i3])), shape=(Nbv, Nbv)) femB = femB + sparse.coo_matrix((area / 12, (poly[:, i3], poly[:, i2])), shape=(Nbv, Nbv)) # if weight_type == 'fem' : # W.data = W.data/2 nnz = W.nnz if z_threshold is not None: z_weights = sss.zscore(W.data) inds_out = np.where(np.abs(z_weights) > z_threshold)[0] W.data[inds_out] = np.mean(W.data) print(' -Zscore threshold needed for ', len(inds_out), ' values = ', 100 * len(inds_out) / nnz, ' %') # inds_out_inf = np.where(z_weights < -z_thresh)[0] # inds_out_sup = np.where(z_weights > z_thresh)[0] # val_inf = np.max(W.data[inds_out_inf]) # W.data[inds_out_inf] = val_inf # val_sup = np.min(W.data[inds_out_sup]) # W.data[inds_out_sup] = val_sup # print(' -Zscore threshold needed for ', # len(inds_out_inf)+len(inds_out_sup),' values-') print(' -edge length threshold needed for ', threshold_needed, ' values = ', 100 * threshold_needed / nnz, ' %') if cot_threshold is not None: print(' -cot threshold needed for ', threshold_needed_angle, ' values = ', 100 * threshold_needed_angle / nnz, ' %') if weight_type == 'meanvalue': for i in range(3): i1 = np.mod(i, 3) i2 = np.mod(i + 1, 3) i3 = np.mod(i + 2, 3) pp = vert[poly[:, i2], :] - vert[poly[:, i1], :] qq = vert[poly[:, i3], :] - vert[poly[:, i1], :] rr = vert[poly[:, i2], :] - vert[poly[:, i3], :] # normalize the vectors noqq = np.sqrt(np.sum(qq * qq, 1)) nopp = np.sqrt(np.sum(pp * pp, 1)) norr = np.sqrt(np.sum(rr * rr, 1)) pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose() qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose() rr = rr / np.vstack((norr, np.vstack((norr, norr)))).transpose() # compute angles angi1 = np.arccos(np.sum(pp * qq, 1)) / 2 qq = -qq angi2 = np.arccos(np.sum(rr * qq, 1)) / 2 W = W + sparse.coo_matrix((np.tan(angi1) / norr, (poly[:, i1], poly[:, i3])), shape=(Nbv, Nbv)) W = W + sparse.coo_matrix((np.tan(angi2) / norr, (poly[:, i3], poly[:, i1])), shape=(Nbv, Nbv)) nnz = W.nnz if weight_type == 'authalic': for i in range(3): i1 = np.mod(i, 3) i2 = np.mod(i + 1, 3) i3 = np.mod(i + 2, 3) pp = vert[poly[:, i2], :] - vert[poly[:, i1], :] qq = vert[poly[:, i3], :] - vert[poly[:, i1], :] rr = vert[poly[:, i2], :] - vert[poly[:, i3], :] # normalize the vectors noqq = np.sqrt(np.sum(qq * qq, 1)) nopp = np.sqrt(np.sum(pp * pp, 1)) norr = np.sqrt(np.sum(rr * rr, 1)) pp = pp / np.vstack((nopp, np.vstack((nopp, nopp)))).transpose() qq = qq / np.vstack((noqq, np.vstack((noqq, noqq)))).transpose() rr = rr / np.vstack((norr, np.vstack((norr, norr)))).transpose() # compute angles angi1 = np.arccos(np.sum(pp * qq, 1)) / 2 cot1 = 1 / np.tan(angi1) qq = -qq angi2 = np.arccos(np.sum(rr * qq, 1)) / 2 cot2 = 1 / np.tan(angi2) W = W + sparse.coo_matrix((cot1 / norr ** 2, (poly[:, i3], poly[:, i1])), shape=(Nbv, Nbv)) W = W + sparse.coo_matrix((cot2 / norr ** 2, (poly[:, i1], poly[:, i3])), shape=(Nbv, Nbv)) nnz = W.nnz li = np.hstack(W.data) nb_Nan = len(np.where(np.isnan(li))[0]) nb_neg = len(np.where(li < 0)[0]) print(' -number of Nan in weights: ', nb_Nan, ' = ', 100 * nb_Nan / nnz, ' %') print(' -number of Negative values in weights: ', nb_neg, ' = ', 100 * nb_neg / nnz, ' %') return W.tocsr(), femB.tocsr()
run1_masked = [] run2_masked = [] indices = np.where((mask_img > 0) & (parcels == 77)) for s in range(len(subjs)): # Load subjects nifti and motion data then clean (run1) print("Loading Run1 BOLD subj num: " + str(s + 1)) run1 = nib.load( datadir + 'subjects/' + subjs[s] + '/analysis/run1.feat/trans_filtered_func_data.nii').get_data()[:, :, :, 0:2511] print("Loading Run1 Motion Regressors") motion_run1 = np.genfromtxt(motion_dir + subjs[s] + '/EPI_mcf1.par') print("Cleaning Run1 BOLD Data") clean_run1 = stats.zscore(clean_data(run1[indices][:], motion_run1), axis=1, ddof=1) run1_masked.append(run1[indices][:]) # Load subjects nifti and motion data then clean (run2) print("Loading Run2 BOLD subj num: " + str(s + 1)) run2 = nib.load( datadir + 'subjects/' + subjs[s] + '/analysis/run2.feat/trans_filtered_func_data.nii').get_data()[:, :, :, 0:2511] print("Loading Run2 Motion Regressors") motion_run2 = np.genfromtxt(motion_dir + subjs[s] + '/EPI_mcf2.par') print("Cleaning Run2 BOLD Data") clean_run2 = stats.zscore(clean_data(run2[indices][:], motion_run2), axis=1, ddof=1)
# Convert data to a list of arrays matching SRM input. # Each element is a matrix of voxels by TRs. # Also, concatenate data from both hemispheres in the brain. movie_data = [] for s in range(subjects): if s % size == rank: movie_data.append( np.concatenate( [movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0)) else: movie_data.append(None) # Z-score the data for subject in range(subjects): if movie_data[subject] is not None: movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1) # Run SRM with the movie data srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, comm=comm) srm.fit(movie_data) # We define a function to present the output of the experiment. def plot_confusion_matrix(cm, title="Confusion Matrix"): """Plots a confusion matrix for each subject """ import matplotlib.pyplot as plt import math plt.figure() subjects = len(cm) root_subjects = math.sqrt(subjects)
def calculate_gene_expression_similarity(reduced_stat_map_data): store_file = "/ahba_data/store_max1_reduced.h5" results_dfs = [] with pd.HDFStore(store_file, 'r') as store: for donor_id in store.keys(): print "Loading expression data (%s)" % donor_id expression_data = store.get(donor_id.replace(".", "_")) print "Getting statmap values (%s)" % donor_id nifti_values = reduced_stat_map_data[expression_data.columns] print "Removing missing values (%s)" % donor_id na_mask = np.isnan(nifti_values) nifti_values = np.array(nifti_values)[np.logical_not(na_mask)] expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True) print "z scoring (%s)" % donor_id expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns, index=expression_data.index) nifti_values = zscore(nifti_values) print "Calculating linear regressions (%s)" % donor_id regression_results = np.linalg.lstsq( np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T) results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index) results_df.columns = pd.MultiIndex.from_tuples( [( donor_id[1:], c, ) for c in results_df.columns], names=['donor_id', 'parameter']) results_dfs.append(results_df) print "Concatenating results" results_df = pd.concat(results_dfs, axis=1) del results_dfs t, p = ttest_1samp(results_df, 0.0, axis=1) group_results_df = pd.DataFrame({ "t": t, "p": p }, columns=['t', 'p'], index=expression_data.index) _, group_results_df["p (FDR corrected)"], _, _ = multipletests( group_results_df.p, method='fdr_bh') group_results_df["variance explained (mean)"] = ( results_df.xs('slope', axis=1, level=1)**2 * 100).mean(axis=1) group_results_df["variance explained (std)"] = ( results_df.xs('slope', axis=1, level=1)**2 * 100).std(axis=1) del results_df probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1) group_results_df = group_results_df.join(probe_info) group_results_df = group_results_df[[ "gene_symbol", "entrez_id.1", "gene_name", "t", "p", "p (FDR corrected)", "variance explained (mean)", "variance explained (std)" ]] return group_results_df
movie_file = sio.loadmat('data/sl_movie_data.mat') movie_data = movie_file['data'] # Dataset size parameters dim1,dim2,dim3,ntr,nsubj = movie_data.shape # preprocess data, zscore and set NaN to 0 all_data = [] # first half train, second half test for s in range(nsubj): with warnings.catch_warnings(): warnings.simplefilter("ignore") # To ignore "RuntimeWarning: invalid value encountered in true_divide" # There are some 0 voxels in the data which I have to keep, so there will be a warning # when z-scoring the data. It should be safe to ignore this warning. If your data does # not contain 0 voxel, you can remove the 2 lines above train_tmp = np.nan_to_num(stats.zscore(movie_data[:,:,:,:int(ntr/2),s],axis=3,ddof=1)) test_tmp = np.nan_to_num(stats.zscore(movie_data[:,:,:,int(ntr/2):,s],axis=3,ddof=1)) all_data.append(np.concatenate((train_tmp,test_tmp),axis=3)) # print information if rank == 0: print ('searchlight length is {}'.format(sl_rad)) print ('number of features in SRM: {}'.format(nfeature)) print ('number of subjects is: {}'.format(len(all_data))) print ('number of TR is: {}'.format(ntr)) print ('brain data dimension is {}-by-{}-by-{}'.format(dim1,dim2,dim3)) # Generate mask: mask is a 3D binary array, with active voxels being 1. I simply set # all voxels to be active in this example, but you should set the mask to fit your ROI # in practice. mask = np.ones((dim1,dim2,dim3), dtype=np.bool)
# srm_k = initial_srm_k # run SRM on masked data if runNum == 0: shared_data = SRM_V1(run2, run1, srm_k, n_iter) elif runNum == 1: shared_data = SRM_V1(run1, run2, srm_k, n_iter) # perform cross-validation style HMM for n_folds for n in range(n_folds): np.random.seed(n) subj_list_shuffle = np.random.permutation(shared_data) # convert data from list to numpy array and z-score in time shared_data_stack = stats.zscore(np.dstack(subj_list_shuffle), axis=1, ddof=1) # split subjects into two groups others = np.mean(shared_data_stack[:, start_idx:end_idx, :13], axis=2) loo = np.mean(shared_data_stack[:, start_idx:end_idx, 13:], axis=2) # fit HMM to song data and return match data where first entry is true match score and all others are permutation scores print("Fitting HMM") WvA[n, :], bounds[n, :] = HMM(others, loo, human_bounds) # take average of WvA scores and bounds over folds avgWvA = fisher_mean(WvA, axis=0) avgBounds = np.mean(bounds, axis=0) # compute z-score
for i in range(int(np.max(parcels))): print("Parcel Num: ", str(i + 1)) # get indices where mask and parcels overlap indices = np.where((mask_img.get_data() > 0) & (parcels == i + 1)) # initialize list for storing masked data across subjects run1 = np.load(parcel_dir + "parcel" + str(i + 1) + "_run1.npy") run2 = np.load(parcel_dir + "parcel" + str(i + 1) + "_run2.npy") # run SRM on masked data if runNum == 0: shared_data = SRM_V1(run2, run1, srm_k, n_iter) elif runNum == 1: shared_data = SRM_V1(run1, run2, srm_k, n_iter) data = np.mean(stats.zscore(np.dstack(shared_data), axis=1, ddof=1), axis=2)[:, start_idx:end_idx] # fit HMM to song data and return match data where first entry is true match score and all others are permutation scores print("Fitting HMM") SL_match = HMM(data, human_bounds) # compute z-score match_z = (SL_match[0] - np.mean(SL_match[1:])) / (np.std(SL_match[1:])) # compute z-score for euclid by flipping sign after z-scoring #match_z = ((SL_match[0] - np.mean(SL_match[1:])) / (np.std(SL_match[1:]))) * -1 # convert z-score to p-value match_p = st.norm.sf(match_z)
songs_run2 = [ 'St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard', 'Waltz_of_Flowers', 'The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk', 'All_Blues' ] durs_run2 = np.array([ 90, 180, 180, 90, 135, 180, 180, 225, 225, 135, 90, 135, 225, 225, 90, 135 ]) # Load in data run1 = np.nan_to_num( stats.zscore(np.load(datadir + 'fdr_01_lprec_full_split_merge_run1_n25.npy'), axis=1, ddof=1)) run2 = np.nan_to_num( stats.zscore(np.load(datadir + 'fdr_01_lprec_full_split_merge_run2_n25.npy'), axis=1, ddof=1)) nSubj = run1.shape[2] # Convert data into lists where each element is voxels by samples run1_list = [] run2_list = [] for i in range(0, nSubj): run1_list.append(run1[:, :, i]) run2_list.append(run2[:, :, i])
# Original series. X0, X1 = gen_series() fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, figsize=(8, 6)) ax0.plot(X0, '-r.') ax0.set_title('Time series X1(t) vs t') ax1.plot(X1, '-r.') ax1.set_title('Time series X1(t) vs t') plt.show() """An essential and necessary step for MSSA is to normalize both time series. That means to remove the mean value and to divide it by the standard deviation (for each series separately).""" X0_zs, X1_zs = stats.zscore(X0), stats.zscore(X1) # Using shifted time series. def shift(arr, n, order='forward'): if isinstance(arr, np.ndarray): arr = arr.tolist() if order == 'forward': shifted = arr[n:] + [0] * n elif order == 'reversed': shifted = [0] * n + arr[:-n] else: print("Order %s not recognized. Try forward or reversed" % order) return shifted
def analyze_results(directory, conditions, n_permutations=1000.): """Write the results of the regression analysis Parameters ---------- directory : string or list of strings Path or list of paths where put results. condition : string or list of strings Conditions to be analyzed. Returns ------- fig : instance of matplotlib.pyplot.Figure The figure handle. """ res_path = '/media/robbis/DATA/fmri/monks/0_results/' subjects = np.loadtxt('/media/robbis/DATA/fmri/monks/attributes_struct.txt', dtype=np.str) path = '/media/robbis/DATA/fmri/monks/' roi_list = [] roi_list = np.loadtxt('/media/robbis/DATA/fmri/templates_fcmri/findlab_rois.txt', delimiter=',', dtype=np.str) if isinstance(directory, str): directory = [directory] if isinstance(conditions, str): conditions = [conditions] for dir_ in directory: for cond_ in conditions: fname_ = os.path.join(res_path, dir_, cond_+'_values_1000_50.npz') results_ = np.load(fname_) values_ = results_['arr_0'].tolist() errors_ = values_['error'] #values_['errors_'] sets_ = values_['features'] #values_['sets_'] weights_ = values_['weights'] #values_['weights_'] samples_ = values_['subjects'] #values_['samples_'] fname_ = os.path.join(res_path, dir_, cond_+'_permutation_1000_50.npz') results_ = np.load(fname_) values_p = results_['arr_0'].tolist() errors_p = values_p['error'] #values_p['errors_p'] sets_p = values_p['features'] #values_p['sets_p'] weights_p = values_p['weights'] #values_p['weights_p'] samples_p = values_p['subjects'] #values_p['samples_p'] errors_p = np.nanmean(errors_p, axis=1) print('-----------'+dir_+'-------------') print(cond_) print ('MSE = '+str(errors_[:,0].mean())+' -- p '+ \ str(np.count_nonzero(errors_p[:,0] < errors_[:,0].mean())/n_permutations)) print('COR = '+str(np.nanmean(errors_[:,1]))+' -- p '+ \ str(np.count_nonzero(errors_p[:,1] > np.nanmean(errors_[:,1]))/n_permutations)) directory_ = dir_ learner_ = "SVR_C_1" prename = "%s_%s" %(cond_, learner_) ######## Get matrix infos ############### conn_test = ConnectivityLoader(res_path, subjects, directory_, roi_list) # Get nan mask to correctly fill matrix nan_mask = conn_test.get_results(['Samatha', 'Vipassana']) # Transform matrix into float of ones mask_ = np.float_(~np.bool_(nan_mask)) # Get the upper part of the matrix mask_ = np.triu(mask_, k=1) mask_indices = np.nonzero(mask_) n_bins = np.count_nonzero(mask_) ###### Plot of distributions of errors and permutations ######### #errors_p = np.nanmean(errors_p, axis=1) fig_ = pl.figure() bpp = pl.boxplot(errors_p, showfliers=False, showmeans=True, patch_artist=True) bpv = pl.boxplot(errors_, showfliers=False, showmeans=True, patch_artist=True) fname = "%s_perm_1000_boxplot.png" %(prename) for box_, boxp_ in zip(bpv['boxes'], bpp['boxes']): box_.set_facecolor('lightgreen') boxp_.set_facecolor('lightslategrey') pl.xticks(np.array([1,2]), ['MSE', 'COR']) pl.savefig(os.path.join(res_path, directory_, fname)) pl.close() n_permutations = np.float(errors_p[:,0].shape[0]) ##### Plot of connection distributions ######## pl.figure() h_values_p, _ = np.histogram(sets_p.flatten(), bins=np.arange(0, n_bins+1)) #pl.plot(zscore(h_values_p)) pl.hist(zscore(h_values_p), bins=25) fname = "%s_features_set_dist.png" %(prename) pl.savefig(os.path.join(res_path, directory_, fname)) pl.figure() h_values_, _ = np.histogram(sets_.flatten(), bins=np.arange(0, n_bins+1)) pl.plot(zscore(h_values_)) fname = "%s_features_set_cross_validation.png" %(prename) pl.savefig(os.path.join(res_path, directory_, fname)) pl.close('all') ######## Plot connectivity stuff ########### weights_ = weights_.squeeze() filling_vector = np.zeros(np.count_nonzero(mask_)) counting_vector = np.zeros(np.count_nonzero(mask_)) for s, w in zip(sets_, weights_): filling_vector[s] += zscore(w) counting_vector[s] += 1 # Calculate the average weights and then zscore avg_weigths = np.nan_to_num(filling_vector/counting_vector) mask_[mask_indices] = avg_weigths matrix_ = np.nan_to_num(copy_matrix(mask_, diagonal_filler=0)) names_lr, colors_lr, index_, coords, _ = get_atlas_info(dir_) ''' matrix_[matrix_ == 0] = np.nan matrix_[np.abs(matrix_) < 1] = np.nan ''' size_w = np.zeros_like(matrix_) size_w[mask_indices] = np.abs(avg_weigths) size_w = np.nan_to_num(copy_matrix(size_w, diagonal_filler=0)) size_w = np.sum(size_w, axis=0) f, _ = plot_connectivity_circle_edited(matrix_[index_][:,index_], names_lr[index_], node_colors=colors_lr[index_], node_size=2*size_w[index_]**2, con_thresh = 1.4, title=cond_, node_angles=circular_layout(names_lr, list(names_lr), ), fontsize_title=19, fontsize_names=13, fontsize_colorbar=13, colorbar_size=0.3, colormap='bwr', #colormap=cm_, vmin=-3., vmax=3., fig=pl.figure(figsize=(16,16)) ) fname = "%s_features_weight.png" %(prename) f.savefig(os.path.join(res_path, directory_, fname), facecolor='black', dpi=150) for d_ in ['x', 'y', 'z']: fname = "%s_connectome_feature_weight_%s.png" %(prename, d_) fname = os.path.join(res_path, directory_, fname) plot_connectome(matrix_, coords, colors_lr, 2*size_w**2, 1.4, fname, #cmap=pl.cm.bwr, title=None, display_=d_, #max_=3., #min_=3. ) fname = "%s_connections_list_feature_weights.txt" %(prename) fname = os.path.join(res_path, directory_, fname) #print_connections(matrix_, names_lr, fname) ######### mask_ = np.float_(~np.bool_(nan_mask)) mask_ = np.triu(mask_, k=1) mask_indices = np.nonzero(mask_) mask_[mask_indices] = h_values_ matrix_ = np.nan_to_num(copy_matrix(mask_, diagonal_filler=0)) size_ = np.zeros_like(matrix_) size_[mask_indices] = counting_vector size_ = np.nan_to_num(copy_matrix(size_, diagonal_filler=0)) size_ = np.sum(size_, axis=0) f, _ = plot_connectivity_circle_edited(matrix_[index_][:,index_], names_lr[index_], node_colors=colors_lr[index_], node_size=size_[index_]*5, con_thresh = 15., title=cond_, node_angles=circular_layout(names_lr, list(names_lr), ), fontsize_title=19, fontsize_names=13, fontsize_colorbar=13, colorbar_size=0.3, #colormap='bwr', #colormap='terrain', #vmin=40, fig=pl.figure(figsize=(16,16)) ) fname = "%s_features_choices.png" %(prename) f.savefig(os.path.join(res_path, directory_, fname), facecolor='black', dpi=150) for d_ in ['x', 'y', 'z']: fname = "%s_connectome_feature_choices_%s.png" %(prename, d_) fname = os.path.join(res_path, directory_, fname) plot_connectome(matrix_, coords, colors_lr, 4.*size_, 15., fname, title=None, max_=50., min_=0., display_=d_ ) fname = "%s_connections_list_feature_choices.txt" %(prename) fname = os.path.join(res_path, directory_, fname) #print_connections(matrix_, names_lr,fname) pl.close('all')
def removeOutliers(data, max=4): # Remove outliers z_scores = stats.zscore(data) abs_z_scores = np.abs(z_scores) filtered_entries = (abs_z_scores < max).all(axis=1) return data[filtered_entries]
import matplotlib.patches as patches import numpy as np import brainiak.eventseg.event from scipy.stats import zscore, pearsonr, stats from scipy.signal import gaussian, convolve from sklearn import decomposition import numpy as np from brainiak.funcalign.srm import SRM import nibabel as nib datadir = '/jukebox/norman/jamalw/MES/prototype/link/scripts/chris_dartmouth/data/' # Load in data train = np.nan_to_num( stats.zscore(np.load(datadir + 'precuneus_k12ish_run1_n25.npy'), axis=1, ddof=1)) test = np.nan_to_num( stats.zscore(np.load(datadir + 'precuneus_k12ish_run2_n25.npy'), axis=1, ddof=1)) # Convert data into lists where each element is voxels by samples train_list = [] test_list = [] for i in range(0, train.shape[2]): train_list.append(train[:, :, i]) test_list.append(test[:, :, i]) # Initialize model print('Building Model')
subjects = movie_data_left.shape[2] # Convert data to a list of arrays matching SRM input. # Each element is a matrix of voxels by TRs. # Also, concatenate data from both hemispheres in the brain. movie_data = [] for s in range(subjects): if s % size == rank: movie_data.append(np.concatenate([movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0)) else: movie_data.append(None) # Z-score the data for subject in range(subjects): if movie_data[subject] is not None: movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1) # Run SRM with the movie data srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, comm=comm) srm.fit(movie_data) # We define a function to present the output of the experiment. def plot_confusion_matrix(cm, title="Confusion Matrix"): """Plots a confusion matrix for each subject """ import matplotlib.pyplot as plt import math plt.figure() subjects = len(cm) root_subjects = math.sqrt(subjects) cols = math.ceil(root_subjects)
def HMM(X, K, loo_idx, song_idx, song_bounds): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations Parameters ---------- A: voxel by time ndarray (2D) B: voxel by time ndarray (2D) C: voxel by time ndarray (2D) D: voxel by time ndarray (2D) K: # of events for HMM (scalar) Returns ------- z: z-score after performing permuted cross-validation analysis """ w = 6 srm_k = 45 nPerm = 1000 within_across = np.zeros(nPerm + 1) run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=srm_k) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) others = np.mean(shared_data[:, :, np.arange(shared_data.shape[-1]) != loo_idx], axis=2) loo = shared_data[:, song_bounds[song_idx]:song_bounds[song_idx + 1], loo_idx] nTR = loo.shape[1] # Fit to all but one subject ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others[:, song_bounds[song_idx]:song_bounds[song_idx + 1]].T) events = np.argmax(ev.segments_[0], axis=1) #### # plot searchlights import matplotlib.pyplot as plt import matplotlib.patches as patches shared_data = srm.transform(run2) avg_response = sum(shared_data) / len(shared_data) plt.figure(figsize=(10, 10)) plt.imshow(np.corrcoef(avg_response[:, 0:89].T)) bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0] ax = plt.gca() bounds_aug = np.concatenate(([0], bounds, [nTR])) for i in range(len(bounds_aug) - 1): rect1 = patches.Rectangle((bounds_aug[i], bounds_aug[i]), bounds_aug[i + 1] - bounds_aug[i], bounds_aug[i + 1] - bounds_aug[i], linewidth=3, edgecolor='w', facecolor='none', label='Model Fit') ax.add_patch(rect1) plt.title('HMM Fit to A1 SRM K = ' + str(srm_k), fontsize=18, fontweight='bold') plt.savefig('plots/St_Pauls SRM K = ' + str(srm_k)) #### # Compute correlations separated by w in time corrs = np.zeros(nTR - w) for t in range(nTR - w): corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm + 1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR, K - 1, replace=False)] = 1 events = np.cumsum(events) return within_across
def HMM(X, K, song_idx, song_bounds): """fit hidden markov model Fit HMM to average data and cross-validate with leftout subjects using within song and between song average correlations Parameters ---------- A: list of 50 (contains 2 runs per subject) 2D (voxels x full time course) arrays B: # of events for HMM (scalar) song_idx: song index (scalar) C: voxel by time ndarray (2D) D: array of song boundaries (1D) Returns ------- wVa score: final score after performing cross-validation of leftout subjects """ w = 6 nPerm = 1000 hrf = 5 within_across = np.zeros(nPerm + 1) run1 = [X[i] for i in np.arange(0, int(len(X) / 2))] run2 = [X[i] for i in np.arange(int(len(X) / 2), len(X))] print('Building Model') srm = SRM(n_iter=10, features=30) print('Training Model') srm.fit(run1) print('Testing Model') shared_data = srm.transform(run2) shared_data = stats.zscore(np.dstack(shared_data), axis=1, ddof=1) others = np.mean(shared_data[:, song_bounds[song_idx] + hrf:song_bounds[song_idx + 1] + hrf, :13], axis=2) loo = np.mean(shared_data[:, song_bounds[song_idx] + hrf:song_bounds[song_idx + 1] + hrf, 13:], axis=2) nTR = loo.shape[1] # Fit to all but one subject ev = brainiak.eventseg.event.EventSegment(K) ev.fit(others.T) events = np.argmax(ev.segments_[0], axis=1) # Compute correlations separated by w in time corrs = np.zeros(nTR - w) for t in range(nTR - w): corrs[t] = pearsonr(loo[:, t], loo[:, t + w])[0] # Compute within vs across boundary correlations, for real and permuted bounds for p in range(nPerm + 1): within = corrs[events[:-w] == events[w:]].mean() across = corrs[events[:-w] != events[w:]].mean() within_across[p] = within - across np.random.seed(p) events = np.zeros(nTR, dtype=np.int) events[np.random.choice(nTR, K - 1, replace=False)] = 1 events = np.cumsum(events) print((within_across[0] - np.mean(within_across[1:])) / np.std(within_across[1:])) return within_across
try: retcode = call(cmd, shell=True) if retcode < 0: print("File download was terminated by signal", -retcode, file=sys.stderr) else: print("File download returned", retcode, file=sys.stderr) except OSError as e: print("File download failed:", e, file=sys.stderr) #get fMRI data and scanner RAS coordinates all_data = scipy.io.loadmat(file_name) data = all_data['data'] R = all_data['R'] # Z-score the data data = stats.zscore(data, axis=1, ddof=1) n_voxel, n_tr = data.shape # Run TFA with downloaded data from brainiak.factoranalysis.tfa import TFA # uncomment below line to get help message on TFA #help(TFA) tfa = TFA(K=5, max_num_voxel=int(n_voxel*0.5), max_num_tr=int(n_tr*0.5), verbose=True) tfa.fit(data, R) print("\n centers of latent factors are:") print(tfa.get_centers(tfa.local_posterior_))
def searchlight(coords, K, mask, loo_idx, subjs, song_idx, song_bounds): """run searchlight Create searchlight object and perform voxel function at each searchlight location Parameters ---------- data1 : voxel by time ndarray (2D); leftout subject run 1 data2 : voxel by time ndarray (2D); average of others run 1 data3 : voxel by time ndarray (2D); leftout subject run 2 data4 : voxel by time ndarray (2D); average of others run 2 coords : voxel by xyz ndarray (2D, Vx3) K : # of events for HMM (scalar) Returns ------- 3D data: brain (or ROI) filled with searchlight function scores (3D) """ stride = 5 radius = 5 min_vox = 10 nPerm = 1000 SL_allvox = [] SL_results = [] datadir = '/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_input/' for x in range(0, np.max(coords, axis=0)[0] + stride, stride): for y in range(0, np.max(coords, axis=0)[1] + stride, stride): for z in range(0, np.max(coords, axis=0)[2] + stride, stride): if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy'): continue D = distance.cdist(coords, np.array([x, y, z]).reshape((1, 3)))[:, 0] SL_vox = D <= radius data = [] for i in range(len(subjs)): subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy') data.append( np.nan_to_num( stats.zscore(subj_data[:, :, 0], axis=1, ddof=1))) for i in range(len(subjs)): subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy') data.append( np.nan_to_num( stats.zscore(subj_data[:, :, 1], axis=1, ddof=1))) print("Running Searchlight") SL_within_across = HMM(data, K, loo_idx, song_idx, song_bounds) SL_results.append(SL_within_across) SL_allvox.append(np.array(np.nonzero(SL_vox)[0])) voxmean = np.zeros((coords.shape[0], nPerm + 1)) vox_SLcount = np.zeros(coords.shape[0]) for sl in range(len(SL_results)): voxmean[SL_allvox[sl], :] += SL_results[sl] vox_SLcount[SL_allvox[sl]] += 1 voxmean = voxmean / vox_SLcount[:, np.newaxis] vox_z = np.zeros((coords.shape[0], nPerm + 1)) for p in range(nPerm + 1): vox_z[:, p] = (voxmean[:, p] - np.mean(voxmean[:, 1:], axis=1)) / np.std( voxmean[:, 1:], axis=1) return vox_z, voxmean
#nn = n.loadtxt('notas_compositores.txt') #nn = n.loadtxt('notas_filosofos.txt') #nn = n.loadtxt('notas_aleatorias.txt') nn = n.array([[r.uniform(1,9) for x in range(_nc)] for y in range(_na)]) #nn_lista = nn.tolist() #print 'QTD NOTAS' #print [sum([[int(round(z)) for z in x].count(y) for x in nn_lista]) for y in range(1,10)] print '\nNOTAS' for i in range(len(nn)): print '%s & %s \\' % (agentes[i], ' & '.join([str(x) for x in nn[i]])) print 'MEDIA', n.mean(nn) print '\nZ-SCORES DAS NOTAS' _zs1 = sss.zscore(nn) for i in range(len(_zs1)): print [round(x, ndigits=2) for x in _zs1[i]] print 'MEDIA', n.mean(n.abs(_zs1)) # cálculo da matriz de correlação # pré-processamento for i in xrange(nn.shape[1]): nn[:,i]=(nn[:,i]-nn[:,i].mean())/nn[:,i].std() # pearson print '\nMATRIZ DE COVARIANCIA' covm = n.cov(nn.T, bias=1) for i in range(len(covm)): print [round(x, ndigits=2) for x in covm[i]] print 'MEDIA', n.mean(n.abs(covm))
# run 1 times song_bounds_run1 = np.array([0,225,314,494,628,718,898,1032,1122,1301,1436,1660,1749,1973, 2198,2377,2511]) songs_run1 = ['Finlandia', 'Blue_Monk', 'I_Love_Music','Waltz_of_Flowers','Capriccio_Espagnole','Island','All_Blues','St_Pauls_Suite','Moonlight_Sonata','Symphony_Fantastique','Allegro_Moderato','Change_of_the_Guard','Boogie_Stop_Shuffle','My_Favorite_Things','The_Bird','Early_Summer'] durs_run1 = np.array([225,90,180,135,90,180,135,90,180,135,225,90,225,225,180,135]) # run 2 times song_bounds_run2 = np.array([0,90,270,449,538,672,851,1031,1255,1480,1614,1704,1839,2063,2288,2377,2511]) songs_run2 = ['St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard','Waltz_of_Flowers','The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk','All_Blues'] durs_run2 = np.array([90,180,180,90,135,180,180,225,225,135,90,135,225,225,90,135]) # Load in data run1 = np.nan_to_num(stats.zscore(np.load(datadir + 'fdr_01_bil_A1_split_merge_no_srm_run1_n25.npy'),axis=1,ddof=1)) run2 = np.nan_to_num(stats.zscore(np.load(datadir + 'fdr_01_bil_A1_split_merge_no_srm_run2_n25.npy'),axis=1,ddof=1)) nSubj = run1.shape[2] nboot = 50 wVa_results = np.zeros((16,len(K_set),nboot)) np.random.seed(bootNum) for b in range(nboot): resamp_subjs = np.random.choice(nSubj, size=nSubj, replace=True) run1_resample = run1[:,:,resamp_subjs] run2_resample = run2[:,:,resamp_subjs]
#print 'QTD NOTAS' #print [sum([[int(round(z)) for z in x].count(y) for x in nn_lista]) for y in range(1,10)] print '\nNOTAS' for i in range(len(nn)): print '%s & %s \\' % (agentes[i], ' & '.join([str(x) for x in nn[i]])) print 'MEDIA', n.mean(nn) # cálculo da matriz de correlação # pré-processamento #for i in xrange(nn.shape[1]): # nn[:,i]=(nn[:,i]-nn[:,i].mean())/nn[:,i].std() print '\nZ-SCORES' _zscores = sss.zscore(nn) for i in range(len(_zscores)): print [round(x, ndigits=2) for x in _zscores[i]] # considerando agora notas standardizadas nn = _zscores # pearson print '\nMATRIZ DE COVARIANCIA' covm = n.cov(nn.T, bias=1) for i in range(len(covm)): print [round(x, ndigits=2) for x in covm[i]] print 'MEDIA', n.mean(n.abs(covm)) def _cov(x, y):
ax.set_xticks([]) ax.text(0.35, 1, f'Cluster {cluster}', fontsize=32, va='bottom',transform=ax.transAxes) fig.show() from pandas import DataFrame dataframe = DataFrame(model._means, columns=vectorizer.get_feature_names()) wordcloud(dataframe) # Z-score charts from scipy.stats import stats zscore = stats.zscore(model._means, axis=0) zscore_df = DataFrame(zscore, columns=vectorizer.get_feature_names()) import matplotlib.pyplot as pl from pandas import concat def zscore(dataframe, term_count=10): fig = pl.figure(figsize=(15, 20)) for i in range(dataframe.shape[0]): cluster_df = dataframe.iloc[[i]].T cluster_df = cluster_df.rename(index=str, columns={i: 'Z-score'}) cluster_df = cluster_df.sort_values(by=['Z-score']) if term_count: half_term_count = term_count // 2 sliced_df = concat([cluster_df[:half_term_count],
if idx % size == rank: #download data file_name = os.path.join(data_dir, 's' + str(idx) + '.mat') cmd = 'curl --location -o ' + file_name + url[idx] try: retcode = call(cmd, shell=True) if retcode < 0: print("File download was terminated by signal", -retcode, file=sys.stderr) else: print("File download returned", retcode, file=sys.stderr) except OSError as e: print("File download failed:", e, file=sys.stderr) all_data = scipy.io.loadmat(file_name) bold = all_data['data'] # z-score the data bold = stats.zscore(bold, axis=1, ddof=1) data.append(bold) R.append(all_data['R']) n_voxel, n_tr = data[0].shape # Run HTFA with downloaded data from brainiak.factoranalysis.htfa import HTFA # uncomment below line to get help message on HTFA #help(HTFA) K = 5 htfa = HTFA(K=K, n_subj=n_subj, max_global_iter=5, max_local_iter=2,
def __init__( self, data: pd.DataFrame, outcome_variable: str, regression_variables: List[str], covariates: Optional[List[str]] = None, min_n: int = 200, report_categorical_betas: bool = False, standardize_data: bool = False, encoding: str = "additive", edge_encoding_info: Optional[pd.DataFrame] = None, process_num: Optional[int] = None, ): # base class init # This takes in minimal regression params (data, outcome_variable, covariates) and # initializes additional parameters (outcome dtype, regression variables, error, and warnings) super().__init__( data=data, outcome_variable=outcome_variable, regression_variables=regression_variables, covariates=covariates, ) # Custom init involving kwargs passed to this regression self.min_n = min_n self.report_categorical_betas = report_categorical_betas self.standardize_data = standardize_data if process_num is None: process_num = multiprocessing.cpu_count() self.process_num = process_num if encoding not in self.KNOWN_ENCODINGS: raise ValueError(f"Genotypes provided with unknown 'encoding': {encoding}") elif encoding == "edge" and edge_encoding_info is None: raise ValueError( "'edge_encoding_info' must be provided when using edge encoding" ) else: self.encoding = encoding self.edge_encoding_info = edge_encoding_info # Ensure the data output type is compatible # Set 'self.family' and 'self.use_t' which are dependent on the outcome dtype if self.outcome_dtype == "categorical": raise NotImplementedError( "Categorical Outcomes are not yet supported for this type of regression." ) elif self.outcome_dtype == "continuous": self.description += ( f"Continuous Outcome (family = Gaussian): '{self.outcome_variable}'" ) self.family = sm.families.Gaussian(link=sm.families.links.identity()) self.use_t = True elif self.outcome_dtype == "binary": # Use the order according to the categorical counts = self.data[self.outcome_variable].value_counts().to_dict() categories = self.data[self.outcome_variable].cat.categories codes, categories = zip(*enumerate(categories)) self.data[self.outcome_variable].replace(categories, codes, inplace=True) self.description += ( f"Binary Outcome (family = Binomial): '{self.outcome_variable}'\n" f"\t{counts[categories[0]]:,} occurrences of '{categories[0]}' coded as 0\n" f"\t{counts[categories[1]]:,} occurrences of '{categories[1]}' coded as 1" ) self.family = sm.families.Binomial(link=sm.families.links.logit()) self.use_t = False else: raise ValueError( "The outcome variable's type could not be determined. Please report this error." ) # Log missing outcome values na_outcome_count = self.data[self.outcome_variable].isna().sum() self.description += f"\nUsing {len(self.data) - na_outcome_count:,} of {len(self.data):,} observations" if na_outcome_count > 0: self.description += ( f"\n\t{na_outcome_count:,} are missing a value for the outcome variable" ) # Standardize continuous variables in the data if needed # Use ddof=1 in the zscore calculation (used for StdErr) to match R if self.standardize_data: if self.outcome_dtype == "continuous": self.data[self.outcome_variable] = stats.zscore( self.data[self.outcome_variable], nan_policy="omit", ddof=1 ) continuous_rvs = self.regression_variables["continuous"] self.data[continuous_rvs] = stats.zscore( self.data[continuous_rvs], nan_policy="omit", ddof=1 ) continuous_covars = [ rv for rv, rv_type in self.covariate_types.items() if rv_type == "continuous" ] self.data[continuous_covars] = stats.zscore( self.data[continuous_covars], nan_policy="omit", ddof=1 ) # Finish updating description self.description += f"\nRegressing {sum([len(v) for v in self.regression_variables.values()]):,} variables" for k, v in self.regression_variables.items(): self.description += f"\n\t{len(v):,} {k} variables"