def estimate_smoothness(self, overwrite=None, imgtype='zstat'): """ estimate smoothness of Z maps using FSL's smoothness estimation """ log_to_file( self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite output_file = os.path.join(self.dirs.dirs['metadata'], 'smoothness_est.csv') if os.path.exists(output_file) and not overwrite: if self.verbose: print('using existing smoothness file') smoothness_df = pandas.read_csv(output_file) return(smoothness_df) # use nipype's interface to the FSL smoothest command est = SmoothEstimate() smoothness = [] for teamID in self.complete_image_sets['unthresh']: for hyp in range(1, 10): if hyp not in self.teams[teamID].images['unthresh'][imgtype]: # fill missing data with nan print('no zstat present for', teamID, hyp) smoothness.append([teamID, hyp, numpy.nan, numpy.nan, numpy.nan]) continue infile = self.teams[teamID].images['unthresh'][imgtype][hyp] if not os.path.exists(infile): print('no image present:', infile) continue else: if self.verbose: print('estimating smoothness for hyp', hyp) est.inputs.zstat_file = infile est.inputs.mask_file = self.dirs.MNI_mask est.terminal_output = 'file_split' smoothest_output = est.run() smoothness.append([teamID, hyp, smoothest_output.outputs.dlh, smoothest_output.outputs.volume, smoothest_output.outputs.resels]) self.teams[teamID].logs['smoothest'] = ( smoothest_output.runtime.stdout, smoothest_output.runtime.stderr) smoothness_df = pandas.DataFrame( smoothness, columns=['teamID', 'hyp', 'dhl', 'volume', 'resels']) smoothness_df.to_csv(output_file) return(smoothness_df)
def compute_image_stats(self, datatype='zstat', overwrite=None): """ compute std and range on statistical images """ log_to_file( self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite # set up directories unthresh_concat_dir = self.dirs.get_output_dir( 'unthresh_concat_%s' % datatype) unthresh_range_dir = self.dirs.get_output_dir( 'unthresh_range_%s' % datatype) unthresh_std_dir = self.dirs.get_output_dir( 'unthresh_std_%s' % datatype) for hyp in range(1, 10): unthresh_file = os.path.join( unthresh_concat_dir, 'hypo%d.nii.gz' % hyp) range_outfile = os.path.join( unthresh_range_dir, 'hypo%d.nii.gz' % hyp) std_outfile = os.path.join( unthresh_std_dir, 'hypo%d.nii.gz' % hyp) if not os.path.exists(range_outfile) \ or not os.path.exists(std_outfile) \ or overwrite: unthresh_img = nibabel.load(unthresh_file) unthresh_data = unthresh_img.get_data() concat_data = numpy.nan_to_num(unthresh_data) # compute range datarange = numpy.max(concat_data, axis=3) \ - numpy.min(concat_data, axis=3) range_img = nibabel.Nifti1Image( datarange, affine=unthresh_img.affine) range_img.to_filename(range_outfile) # compute standard deviation datastd = numpy.std(concat_data, axis=3) std_img = nibabel.Nifti1Image( datastd, affine=unthresh_img.affine) std_img.to_filename(std_outfile)
def compute_image_stats(self, datatype='zstat', overwrite=None): """ compute std and range on statistical images """ log_to_file( self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite for hyp in range(1, 10): unthresh_file = os.path.join( self.dirs.dirs['output'], 'unthresh_concat_%s/hypo%d.nii.gz' % (datatype, hyp)) range_outfile = os.path.join( self.dirs.dirs['output'], 'unthresh_range_%s/hypo%d.nii.gz' % (datatype, hyp)) if not os.path.exists(os.path.join( self.dirs.dirs['output'], 'unthresh_range_%s' % datatype)): os.mkdir(os.path.join( self.dirs.dirs['output'], 'unthresh_range_%s' % datatype)) std_outfile = os.path.join( self.dirs.dirs['output'], 'unthresh_std_%s/hypo%d.nii.gz' % (datatype, hyp)) if not os.path.exists(os.path.join( self.dirs.dirs['output'], 'unthresh_std_%s' % datatype)): os.mkdir(os.path.join( self.dirs.dirs['output'], 'unthresh_std_%s' % datatype)) if not os.path.exists(range_outfile) \ or not os.path.exists(std_outfile) \ or overwrite: unthresh_img = nibabel.load(unthresh_file) unthresh_data = unthresh_img.get_data() concat_data = numpy.nan_to_num(unthresh_data) datarange = numpy.max(concat_data, axis=3) \ - numpy.min(concat_data, axis=3) range_img = nibabel.Nifti1Image( datarange, affine=unthresh_img.affine) range_img.to_filename(range_outfile) datastd = numpy.std(concat_data, axis=3) std_img = nibabel.Nifti1Image( datastd, affine=unthresh_img.affine) std_img.to_filename(std_outfile)
def get_thresh_similarity(narps, dataset='resampled'): """ For each pair of thresholded images, compute the similarity of the thresholded/binarized maps using the Jaccard coefficient. Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa also add computation of jaccard on only nonzero pairs (ala scipy) """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join(narps.dirs.dirs['logs'], 'AnalyzeMaps-%s.txt' % func_name) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) for hyp in hypnums: print('analyzing thresh similarity for hypothesis', hyp) maskdata, labels = get_concat_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], imgtype='thresh', dataset=dataset) pctagree = matrix_pct_agreement(maskdata) median_pctagree = numpy.median(pctagree[numpy.triu_indices_from( pctagree, 1)]) log_to_file( logfile, 'hyp %d: median pctagree similarity: %f' % (hyp, median_pctagree)) df_pctagree = pandas.DataFrame(pctagree, index=labels, columns=labels) df_pctagree.to_csv( os.path.join(narps.dirs.dirs['metadata'], 'pctagree_hyp%d.csv' % hyp)) seaborn.clustermap(df_pctagree, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses_full[hyp]) plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'hyp%d_pctagree_map_thresh.pdf' % hyp), bbox_inches='tight') plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'hyp%d_pctagree_map_thresh.png' % hyp), bbox_inches='tight') plt.close() # get jaccard for nonzero voxels jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard')) median_jacsim_nonzero = numpy.median( jacsim_nonzero[numpy.triu_indices_from(jacsim_nonzero, 1)]) log_to_file( logfile, 'hyp %d: median jacaard similarity (nonzero): %f' % (hyp, median_jacsim_nonzero))
def create_concat_images(self, datatype='resampled', imgtypes=None, overwrite=None): """ create images concatenated across teams ordered by self.complete_image_sets """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if imgtypes is None: imgtypes = ['thresh', 'unthresh'] if overwrite is None: overwrite = self.overwrite for imgtype in imgtypes: self.dirs.dirs['concat_%s' % imgtype] = os.path.join( self.dirs.dirs['output'], '%s_concat_%s' % (imgtype, datatype)) for hyp in range(1, 10): outfile = os.path.join(self.dirs.dirs['concat_%s' % imgtype], 'hypo%d.nii.gz' % hyp) if not os.path.exists(os.path.dirname(outfile)): os.mkdir(os.path.dirname(outfile)) if not os.path.exists(outfile) or overwrite: if self.verbose: print('%s - hypo %d: creating concat file' % (imgtype, hyp)) concat_teams = [ teamID for teamID in self.complete_image_sets if os.path.exists(self.teams[teamID].images[imgtype] [datatype][hyp]) ] self.all_maps[imgtype][datatype] = [ self.teams[teamID].images[imgtype][datatype][hyp] for teamID in concat_teams ] # use nilearn NiftiMasker to load data # and save to a new file masker = nilearn.input_data.NiftiMasker( mask_img=self.dirs.MNI_mask) concat_data = masker.fit_transform( self.all_maps[imgtype][datatype]) concat_img = masker.inverse_transform(concat_data) concat_img.to_filename(outfile) else: if self.verbose: print('%s - hypo %d: using existing file' % (imgtype, hyp)) return (self.all_maps)
def get_thresh_similarity(narps, dataset='resampled'): """ For each pair of thresholded images, compute the similarity of the thresholded/binarized maps using the Jaccard coefficient. Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa also add computation of jaccard on only nonzero pairs (ala scipy) """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join( narps.dirs.dirs['logs'], '%s-%s.txt' % (sys.argv[0].split('.')[0], func_name)) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) output_dir = os.path.join(narps.dirs.dirs['output'], 'jaccard_thresh') if not os.path.exists(output_dir): os.mkdir(output_dir) for hyp in hypnums: print('creating Jaccard map for hypothesis', hyp) maskdata, labels = get_masked_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], imgtype='thresh', dataset=dataset) jacsim = 1 - pairwise_distances(maskdata, metric="hamming") jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard')) df = pandas.DataFrame(jacsim, index=labels, columns=labels) df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp)) df_nonzero = pandas.DataFrame(jacsim_nonzero, index=labels, columns=labels) df_nonzero.to_csv( os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp)) seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses[hyp]) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_jaccard_map_thresh.pdf' % hyp)) plt.close() seaborn.clustermap(df_nonzero, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses[hyp]) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp)) plt.close()
def create_mean_thresholded_images(self, datatype='resampled', overwrite=None, thresh=1e-5): """ create overlap maps for thresholded images """ log_to_file( self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) imgtype = 'thresh' if overwrite is None: overwrite = self.overwrite output_dir = self.dirs.get_output_dir('overlap_binarized_thresh') concat_dir = self.dirs.get_output_dir( '%s_concat_%s' % (imgtype, datatype)) for hyp in range(1, 10): outfile = os.path.join( output_dir, 'hypo%d.nii.gz' % hyp) if not os.path.exists(outfile) or overwrite: if self.verbose: print('%s - hypo %d: creating overlap file' % ( imgtype, hyp)) concat_file = os.path.join( concat_dir, 'hypo%d.nii.gz' % hyp) concat_img = nibabel.load(concat_file) concat_data = concat_img.get_data() concat_data = (concat_data > thresh).astype('float') concat_mean = numpy.mean(concat_data, 3) concat_mean_img = nibabel.Nifti1Image(concat_mean, affine=concat_img.affine) concat_mean_img.to_filename(outfile) else: if self.verbose: print('%s - hypo %d: using existing file' % ( imgtype, hyp))
def analyze_clusters(narps, dendrograms, membership, dataset='zstat', corr_type='spearman', thresh=2., vmax=5., rand_thresh=0.2): """ Use dendrogram computed by seaborn clustermap to identify clusters, and then create separate mean statstical map for each cluster. """ # if dendrograms is None or membership is None: # with open(os.path.join( # narps.dirs.dirs['output'], # 'unthresh_dendrograms_%s.pkl' % corr_type), 'rb') as f: # dendrograms, membership = pickle.load(f) func_args = inspect.getargvalues(inspect.currentframe()).locals # remove these to keep logs more tractable del func_args['membership'] del func_args['dendrograms'] func_name = sys._getframe().f_code.co_name logfile = os.path.join(narps.dirs.dirs['logs'], 'AnalyzeMaps-%s.txt' % func_name) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) mean_smoothing = {} mean_decision = {} cluster_metadata = {} cluster_metadata_df = pandas.DataFrame( columns=['hyp%d' % i for i in hypnums], index=narps.metadata.teamID) masker = nilearn.input_data.NiftiMasker(mask_img=narps.dirs.MNI_mask) for i, hyp in enumerate(hypnums): log_to_file(logfile, 'hyp %d' % hyp) # set cluster indices back to int, for consistency with above clusters = [int(x) for x in list(membership[str(hyp)].keys())] clusters.sort() fig, ax = plt.subplots(len(clusters), 1, figsize=(12, 12)) cluster_metadata[hyp] = {} mean_smoothing[str(hyp)] = {} mean_decision[str(hyp)] = {} for j, cl in enumerate(clusters): log_to_file( logfile, 'hyp %d cluster %d (%s)' % (hyp, cl, cluster_colors[j + 1])) # get all images for this cluster and average them member_maps = [] member_smoothing = [] member_decision = [] for member in membership[str(hyp)][str(cl)]: cid = narps.teams[member].datadir_label infile = os.path.join( narps.dirs.dirs['output'], '%s/%s/hypo%d_unthresh.nii.gz' % (dataset, cid, hyp)) if os.path.exists(infile): member_maps.append(infile) member_smoothing.append( narps.metadata.query('varnum==%d' % hyp).query( 'teamID=="%s"' % member)['fwhm'].iloc[0]) member_decision.append( narps.metadata.query('varnum==%d' % hyp).query( 'teamID=="%s"' % member)['Decision'].iloc[0]) log_to_file(logfile, membership[str(hyp)][str(cl)]) cluster_metadata[hyp][cl] = narps.metadata[ narps.metadata.teamID.isin(membership[str(hyp)][str(cl)])] for m in membership[str(hyp)][str(cl)]: cluster_metadata_df.loc[m, 'hyp%d' % hyp] = cl log_to_file(logfile, 'N cluster %d maps: %d' % (cl, len(member_maps))) mean_smoothing[str(hyp)][str(cl)] = numpy.mean( numpy.array(member_smoothing)) mean_decision[str(hyp)][str(cl)] = numpy.mean( numpy.array(member_decision)) log_to_file(logfile, 'mean fwhm: %f' % mean_smoothing[str(hyp)][str(cl)]) log_to_file(logfile, 'pYes: %f' % mean_decision[str(hyp)][str(cl)]) maskdata = masker.fit_transform(member_maps) meandata = numpy.mean(maskdata, 0) mean_img = masker.inverse_transform(meandata) mean_filename = os.path.join( narps.dirs.dirs['output'], 'cluster_maps/hyp%d_cluster%d_mean.nii.gz' % (hyp, cl)) if not os.path.exists(os.path.dirname(mean_filename)): os.mkdir(os.path.dirname(mean_filename)) mean_img.to_filename(mean_filename) nilearn.plotting.plot_stat_map( mean_img, threshold=thresh, vmax=vmax, display_mode="z", colorbar=True, title='H%d - cluster %d [%s] (pYes = %0.2f)' % (hyp, cl, cluster_colornames[cluster_colors[j + 1]], mean_decision[str(hyp)][str(cl)]), cut_coords=cut_coords, axes=ax[j]) log_to_file(logfile, '') log_to_file(logfile, '') plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'hyp%d_cluster_means.pdf' % hyp), bbox_inches='tight') plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'hyp%d_cluster_means.png' % hyp), bbox_inches='tight') plt.close(fig) # save cluster metadata to data frame cluster_metadata_df = cluster_metadata_df.dropna() cluster_metadata_df = cluster_metadata_df[~cluster_metadata_df.index. duplicated(keep='first')] cluster_metadata_df.to_csv( os.path.join(narps.dirs.dirs['metadata'], 'cluster_metadata_df.csv')) # compute clustering similarity across hypotheses log_to_file(logfile, 'Computing cluster similarity (Rand score)') log_to_file(logfile, 'pairs with adjusted Rand index > %f' % rand_thresh) randmtx = numpy.zeros((10, 10)) for i, j in enumerate(hypnums): for k in hypnums[i:]: if j == k: continue randmtx[j, k] = sklearn.metrics.adjusted_rand_score( cluster_metadata_df['hyp%d' % j], cluster_metadata_df['hyp%d' % k]) if randmtx[j, k] > rand_thresh: log_to_file(logfile, '%d, %d: %f' % (j, k, randmtx[j, k])) numpy.savetxt( os.path.join(narps.dirs.dirs['output'], 'cluster_membership_Rand_indices.csv'), randmtx) # are the same teams in the main cluster each time? main_cluster_teams = [] print('index:', cluster_metadata_df.index) for i, hyp in enumerate(hypnums): # find main cluster clusters = cluster_metadata_df.loc[:, 'hyp%d' % hyp] clusters.index = cluster_metadata_df.index cnt = clusters.value_counts() largest_cluster = cnt.index[0] main_cluster_teams = main_cluster_teams +\ clusters[clusters == largest_cluster].index.tolist() main_cluster_counts = Counter(main_cluster_teams) consistent_teams = [ m for m in main_cluster_counts if main_cluster_counts[m] == 7 ] log_to_file( logfile, 'Number of teams consistently in main cluster: %d' % len(consistent_teams)) return (cluster_metadata_df)
def mk_correlation_maps_unthresh(narps, corr_type='spearman', n_clusters=None, dataset='zstat', vox_mask_thresh=1.0): """ Create correlation maps for unthresholded images These correlation matrices are clustered using Ward clustering, with the number of clusters for each hypotheses determined by visual examination. vox_mask_thresh controls which voxels are analyzed in terms of proportion of teams with signal in voxel. defaults to 100% """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join(narps.dirs.dirs['logs'], 'AnalyzeMaps-%s.txt' % func_name) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) if n_clusters is None: n_clusters = {1: 3, 2: 3, 5: 3, 6: 3, 7: 3, 8: 3, 9: 3} dendrograms = {} membership = {} cc_unthresh = {} output_dir = narps.dirs.get_output_dir('correlation_unthresh') for i, hyp in enumerate(hypnums): print('creating correlation map for hypothesis', hyp) membership[str(hyp)] = {} maskdata, labels = get_concat_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], dataset=dataset, vox_mask_thresh=vox_mask_thresh, logfile=logfile) # compute correlation of all datasets with mean if 'mean_corr' not in locals(): mean_corr = pandas.DataFrame(numpy.zeros( (len(labels), len(hypnums))), columns=['H%d' % i for i in hypnums], index=labels) meandata = numpy.mean(maskdata, 0) for t in range(maskdata.shape[0]): mean_corr.iloc[t, i] = scipy.stats.spearmanr(maskdata[t, :], meandata).correlation # cluster datasets if corr_type == 'spearman': cc = scipy.stats.spearmanr(maskdata.T).correlation else: # use Pearson cc = numpy.corrcoef(maskdata) cc = numpy.nan_to_num(cc) df = pandas.DataFrame(cc, index=labels, columns=labels) df.to_csv( os.path.join(output_dir, '%s_unthresh_hyp%d.csv' % (corr_type, hyp))) ward_linkage = scipy.cluster.hierarchy.ward(cc) # add 1 to cluster labels so they start at 1 # rather than zero - for clarity in paper clustlabels = [ s[0] + 1 for s in scipy.cluster.hierarchy.cut_tree( ward_linkage, n_clusters=n_clusters[hyp]) ] print('clustlabels:', clustlabels) # get decisions for column colors md = narps.metadata.query('varnum==%d' % hyp).set_index('teamID') decision_colors = ['r', 'g'] col_colors = [ decision_colors[md.loc[teamID, 'Decision']] for teamID in labels ] row_colors = [cluster_colors[s] for s in clustlabels] print('row_colors:', row_colors) cm = seaborn.clustermap(df, cmap='vlag', figsize=(16, 16), method='ward', row_colors=row_colors, col_colors=col_colors, center=0, vmin=-1, vmax=1) plt.title('H%d:' % hyp + hypotheses_full[hyp]) cc_unthresh[hyp] = (cc, labels) plt.savefig(os.path.join( narps.dirs.dirs['figures'], 'hyp%d_%s_map_unthresh.pdf' % (hyp, corr_type)), bbox_inches='tight') plt.savefig(os.path.join( narps.dirs.dirs['figures'], 'hyp%d_%s_map_unthresh.png' % (hyp, corr_type)), bbox_inches='tight') plt.close() dendrograms[hyp] = ward_linkage # get cluster membership for j in cm.dendrogram_row.reordered_ind: cl = clustlabels[j] if str(cl) not in membership[str(hyp)]: membership[str(hyp)][str(cl)] = [] membership[str(hyp)][str(cl)].append(labels[j]) # save cluster data to file so that we don't have to rerun everything with open( os.path.join(output_dir, 'unthresh_cluster_membership_%s.json' % corr_type), 'w') as f: json.dump(membership, f) # also save correlation info median_corr = mean_corr.median(1).sort_values() median_corr_df = pandas.DataFrame(median_corr, columns=['median_corr']) median_corr_df.to_csv( os.path.join(narps.dirs.dirs['metadata'], 'median_pattern_corr.csv')) log_to_file( logfile, 'median correlation between teams: %f' % numpy.median(cc[numpy.triu_indices_from(cc, 1)])) return ((dendrograms, membership))
def convert_to_zscores(self, map_metadata_file=None, overwrite=None): """ convert rectified images to z scores - unthresholded images could be either t or z images - if they are already z then just copy - use metadata supplied by teams to determine image type """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite if map_metadata_file is None: map_metadata_file = os.path.join( self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv') unthresh_stat_type = get_map_metadata(map_metadata_file) metadata = get_metadata(self.metadata_file) n_participants = metadata[['n_participants', 'NV_collection_string']] n_participants.index = metadata.teamID unthresh_stat_type = unthresh_stat_type.merge(n_participants, left_index=True, right_index=True) for teamID in self.complete_image_sets: if teamID not in unthresh_stat_type.index: print('no map metadata for', teamID) continue # this is a bit of a kludge # since some contrasts include all subjects # but others only include some # we don't have the number of participants in each # group so we just use the entire number n = unthresh_stat_type.loc[teamID, 'n_participants'] for hyp in range(1, 10): infile = self.teams[teamID].images['unthresh']['rectified'][ hyp] if not os.path.exists(infile): print('skipping', infile) continue self.teams[teamID].images['unthresh']['zstat'][ hyp] = os.path.join(self.dirs.dirs['zstat'], self.teams[teamID].datadir_label, 'hypo%d_unthresh.nii.gz' % hyp) if not overwrite and os.path.exists( self.teams[teamID].images['unthresh']['zstat'][hyp]): continue if unthresh_stat_type.loc[teamID, 'unthresh_type'].lower() == 't': if not os.path.exists( os.path.dirname(self.teams[teamID]. images['unthresh']['zstat'][hyp])): os.mkdir( os.path.dirname( self.teams[teamID].images['unthresh']['zstat'] [hyp])) print("converting %s (hyp %d) to z - %d participants" % (teamID, hyp, n)) TtoZ(infile, self.teams[teamID].images['unthresh']['zstat'][hyp], n - 1) elif unthresh_stat_type.loc[teamID, 'unthresh_type'] == 'z': if not os.path.exists( os.path.dirname(self.teams[teamID]. images['unthresh']['zstat'][hyp])): os.mkdir( os.path.dirname( self.teams[teamID].images['unthresh']['zstat'] [hyp])) if not os.path.exists(self.teams[teamID].images['unthresh'] ['zstat'][hyp]): print('copying', teamID) shutil.copy( infile, os.path.dirname( self.teams[teamID].images['unthresh']['zstat'] [hyp])) else: # if it's not T or Z then we skip it as it's not usable print('skipping %s - other data type' % teamID)
def create_rectified_images(self, map_metadata_file=None, overwrite=None): """ create rectified images - contrasts 5 and 6 were negative contrasts some teams uploaded images where negative values provided evidence in favor of the contrast using metadata provided by teams, we identify these images and flip their valence so that all maps present positive evidence for each contrast """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if map_metadata_file is None: map_metadata_file = os.path.join( self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv') map_metadata = get_map_metadata(map_metadata_file) if overwrite is None: overwrite = self.overwrite for teamID in self.complete_image_sets: for hyp in range(1, 10): if hyp in [5, 6]: mdstring = map_metadata.query('teamID == "%s"' % teamID)['hyp%d_direction' % hyp].iloc[0] rectify = mdstring.split()[0] == 'Negative' elif hyp == 9: # manual fix for one team with reversed maps if teamID in ['R7D1']: mdstring = map_metadata.query( 'teamID == "%s"' % teamID)['hyp%d_direction' % hyp].iloc[0] rectify = True else: # just copy the other hypotheses directly rectify = False # load data from unthresh map within # positive voxels of thresholded mask unthresh_file = self.teams[teamID].images['unthresh'][ 'resampled'][hyp] self.teams[teamID].images['unthresh']['rectified'][ hyp] = os.path.join(self.dirs.dirs['rectified'], self.teams[teamID].datadir_label, 'hypo%d_unthresh.nii.gz' % hyp) if not os.path.exists( os.path.dirname(self.teams[teamID].images['unthresh'] ['rectified'][hyp])): os.mkdir( os.path.dirname(self.teams[teamID].images['unthresh'] ['rectified'][hyp])) if overwrite or not os.path.exists( self.teams[teamID].images['unthresh']['rectified'] [hyp]): # if values were flipped for negative contrasts if rectify: print('rectifying hyp', hyp, 'for', teamID) print(mdstring) print('') img = nibabel.load(unthresh_file) img_rectified = nilearn.image.math_img('img*-1', img=img) img_rectified.to_filename( self.teams[teamID].images['unthresh']['rectified'] [hyp]) self.rectified_list.append((teamID, hyp)) else: # just copy original shutil.copy( unthresh_file, self.teams[teamID].images['unthresh']['rectified'] [hyp]) # write list of rectified teams to disk if len(self.rectified_list) > 0: with open( os.path.join(self.dirs.dirs['metadata'], 'rectified_images_list.txt'), 'w') as f: for l in self.rectified_list: f.write('%s\t%s\n' % (l[0], l[1]))
def create_rectified_images(self, map_metadata_file=None, overwrite=None): """ create rectified images - contrasts 5 and 6 were negative contrasts some teams uploaded images where negative values provided evidence in favor of the contrast using metadata provided by teams, we identify these images and flip their valence so that all maps present positive evidence for each contrast """ log_to_file(self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite for teamID in self.complete_image_sets['unthresh']: if not hasattr(self.teams[teamID], 'rectify'): print('no rectification data for %s, skipping' % teamID) continue for hyp in range(1, 10): if hyp not in self.teams[teamID].rectify: print('no rectification data for %s hyp%d, skipping' % (teamID, hyp)) continue rectify = self.teams[teamID].rectify[hyp] # load data from unthresh map within # positive voxels of thresholded mask unthresh_file = self.teams[teamID].images['unthresh'][ 'resampled'][hyp] self.teams[teamID].images['unthresh']['rectified'][ hyp] = os.path.join(self.dirs.dirs['rectified'], self.teams[teamID].datadir_label, 'hypo%d_unthresh.nii.gz' % hyp) if not os.path.exists( os.path.dirname(self.teams[teamID].images['unthresh'] ['rectified'][hyp])): os.mkdir( os.path.dirname(self.teams[teamID].images['unthresh'] ['rectified'][hyp])) if overwrite or not os.path.exists( self.teams[teamID].images['unthresh']['rectified'] [hyp]): # if values were flipped for negative contrasts if rectify: print('rectifying hyp', hyp, 'for', teamID) img = nibabel.load(unthresh_file) img_rectified = nilearn.image.math_img('img*-1', img=img) img_rectified.to_filename( self.teams[teamID].images['unthresh']['rectified'] [hyp]) self.rectified_list.append((teamID, hyp)) else: # just copy original shutil.copy( unthresh_file, self.teams[teamID].images['unthresh']['rectified'] [hyp]) # write list of rectified teams to disk if len(self.rectified_list) > 0: with open( os.path.join(self.dirs.dirs['metadata'], 'rectified_images_list.txt'), 'w') as f: for l in self.rectified_list: f.write('%s\t%s%s' % (l[0], l[1], os.linesep))
def create_concat_images(self, datatype='resampled', create_voxel_map=False, imgtypes=None, overwrite=None): """ create images concatenated across teams ordered by self.complete_image_sets create_voxel_map: will create a map showing proportion of nonzero teams at each voxel """ log_to_file(self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if imgtypes is None: imgtypes = ['thresh', 'unthresh'] if overwrite is None: overwrite = self.overwrite for imgtype in imgtypes: concat_dir = self.dirs.get_output_dir('%s_concat_%s' % (imgtype, datatype)) for hyp in range(1, 10): outfile = os.path.join(concat_dir, 'hypo%d.nii.gz' % hyp) if self.verbose: print(outfile) if not os.path.exists(outfile) or overwrite: if self.verbose: print('%s - hypo %d: creating concat file' % (imgtype, hyp)) concat_teams = [ teamID for teamID in self.complete_image_sets[imgtype] if os.path.exists(self.teams[teamID].images[imgtype] [datatype][hyp]) ] self.all_maps[imgtype][datatype] = [ self.teams[teamID].images[imgtype][datatype][hyp] for teamID in concat_teams ] # use nilearn NiftiMasker to load data # and save to a new file masker = nilearn.input_data.NiftiMasker( mask_img=self.dirs.MNI_mask) concat_data = masker.fit_transform( self.all_maps[imgtype][datatype]) concat_img = masker.inverse_transform(concat_data) concat_img.to_filename(outfile) if create_voxel_map: concat_data = nibabel.load(outfile).get_data() voxel_map = numpy.mean( numpy.abs(concat_data) > 1e-6, 3) voxel_img = nibabel.Nifti1Image( voxel_map, affine=concat_img.affine) mapfile = outfile.replace('.nii.gz', '_voxelmap.nii.gz') assert mapfile != outfile voxel_img.to_filename(mapfile) # save team ID and files to a label file for provenance labelfile = outfile.replace('.nii.gz', '.labels') with open(labelfile, 'w') as f: for i, team in enumerate(concat_teams): f.write('%s\t%s%s' % (team, self.all_maps[imgtype][datatype][i], os.linesep)) else: if self.verbose: print('%s - hypo %d: using existing file' % (imgtype, hyp)) return (self.all_maps)