def load_data(self):
        """ Loads data to set up classificaiton problem. Most importantly self.data is filled in, which consists
        of a Numpy array (length = number of regions) with X and y data for each region """
        from neurosynth.analysis.reduce import average_within_regions

        all_ids = self.dataset.image_table.ids

        high_thresh = average_within_regions(
            self.dataset, self.mask_img, threshold=self.thresh)
        low_thresh = average_within_regions(
            self.dataset, self.mask_img, threshold=self.thresh_low)

        self.data = np.empty(high_thresh.shape[0], dtype=np.object)
        for i, on_mask in enumerate(high_thresh):
            on_data = self.dataset.get_feature_data(
                ids=np.array(all_ids)[np.where(on_mask == True)[0]]).dropna()

            off_mask = low_thresh[i]
            off_ids = list(
                set(all_ids) - set(np.array(all_ids)[np.where(off_mask == True)[0]]))
            off_data = self.dataset.feature_table.get_feature_data(
                ids=off_ids).dropna()

            y = np.array([0] * off_data.shape[0] + [1] * on_data.shape[0])
            X = np.vstack((np.array(off_data), np.array(on_data)))

            from sklearn.preprocessing import scale
            X = scale(X, with_mean=False)
            self.data[i] = (X, y)

        self.feature_names = self.dataset.get_feature_data().columns.tolist()
        self.n_regions = self.data.shape[0]
示例#2
0
 def test_roi_averaging(self):
     """ Test averaging within region labels in a mask. """
     filename = get_test_data_path() + 'sgacc_mask.nii.gz'
     avg_vox = reduce.average_within_regions(self.dataset, filename)
     n_studies = self.dataset.image_table.data.shape[1]
     self.assertEqual(n_studies, avg_vox.shape[1])
     self.assertGreater(avg_vox.sum(), 0.05)
示例#3
0
    def load_data(self, features):
        """ Loads ids and data for each individual mask """

        print "Loading data from neurosynth..."

        from neurosynth.analysis.reduce import average_within_regions

        if self.mask_img is None:
            self.y = self.dataset.get_image_data()
        elif isinstance(self.mask_img, basestring):
            if self.mask_img[-3:] == ".pkl":
                import cPickle
                self.y = cPickle.load(open(self.mask_img, 'rb'))
            else:
                # ADD FEATURE TO FILTER BY FEATURES
                self.y = average_within_regions(
                    self.dataset, self.mask_img)
        else:
            self. y = self.mask_img

        self.mask_num =  self.y.shape[0]

        from neurosynth.analysis.classify import regularize

        X = self.dataset.get_feature_data(features=features)
        self.X = regularize(X, method='scale')

        self.set_dims()
示例#4
0
    def load_mask_data(self, features=None):
        """ Loads ids and data for each individual mask """
        from neurosynth.analysis.reduce import average_within_regions

        # ADD FEATURE TO FILTER BY FEATURES
        masks_by_studies = average_within_regions(
            self.dataset, self.mask_img, threshold=self.thresh)

        study_ids = self.dataset.feature_table.data.index

        print "Loading data from neurosynth..."

        pb = tools.ProgressBar(len(list(masks_by_studies)), start=True)

        self.ids_by_masks = []
        self.data_by_masks = []
        for mask in masks_by_studies:

            m_ids = study_ids[np.where(mask == True)[0]]
            self.ids_by_masks.append(m_ids)
            self.data_by_masks.append(self.dataset.get_feature_data(ids=m_ids, features=features))
            pb.next()

        self.mask_num = masks_by_studies.shape[0]

        self.feature_names = self.dataset.get_feature_data(features=features).columns.tolist()
    def load_data(self, features, X_threshold):
        """ Load data into c_data """
        from neurosynth.analysis.reduce import average_within_regions

        # Load Masks by studies matrix

        # ADD FEATURE TO FILTER BY FEATURES
        masks_by_studies = average_within_regions(self.dataset, self.mask_img, threshold = self.thresh)

        study_ids = self.dataset.feature_table.data.index

        print "Loading data from neurosynth..."

        pb = tools.ProgressBar(len(list(masks_by_studies)), start=True)

        self.ids_by_masks = []
        self.data_by_masks = []
        for mask in masks_by_studies:

            m_ids = study_ids[np.where(mask == True)[0]]
            self.ids_by_masks.append(m_ids)
            self.data_by_masks.append(self.dataset.get_feature_data(ids=m_ids))
            pb.next()

        self.mask_num = masks_by_studies.shape[0]    
        self.mask_pairs = list(itertools.permutations(range(0, self.mask_num), 2))

        filename = path.join(mkdtemp(), 'c_data.dat')
        self.c_data = np.memmap(filename, dtype='object',
                                mode='w+', shape=(self.mask_num, self.mask_num))
        # Load data
        for pair in self.mask_pairs:
            reg1_ids = self.ids_by_masks[pair[0]]
            reg2_ids = self.ids_by_masks[pair[1]]

            reg1_set = list(set(reg1_ids) - set(reg2_ids))
            reg2_set = list(set(reg2_ids) - set(reg1_ids))

            x1 = self.data_by_masks[pair[0]]
            x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]]

            x2 = self.data_by_masks[pair[1]]
            x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]] 

            y = np.array([0]*len(reg1_set) + [1]*len(reg2_set))

            X = np.vstack((x1, x2))

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            self.c_data[pair] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []
示例#6
0
    def __init__(self, neurosynth_dataset, image_file_name, word_counts_file):

        self.word_subset = neurosynth_dataset.get_feature_names()
        self.word_string_to_index = {}
        for i in range(len(self.word_subset)):
            self.word_string_to_index[self.word_subset[i]] = i
        self.dois = list(neurosynth_dataset.feature_table.ids)
        self.average_activation = nsar.average_within_regions(neurosynth_dataset, image_file_name)
        self.get_word_subset_counts(self.word_subset, word_counts_file)
示例#7
0
	def save_roi_data(self,atlas='all'):
		"""save roi data for all atlases in atlas_list in a json file in data subfolder"""
		if atlas == 'all':
			for atlas in self.atlas_list:
				res = nr.average_within_regions(self.dataset,atlas)
				j=json.dumps(res.tolist())
				atlas_name = 'data/'+ path.basename(atlas)[:-4] + '.json'
				#self.json_files.append(atlas_name)
				jfile = open(atlas_name,'w')
				jfile.write(j)
				jfile.close()
		if atlas == 'craddock':
				atlas = self.atlas_list[3]
				res = nr.average_within_regions(self.dataset,atlas)
				j=json.dumps(res.tolist())
				atlas_name = 'data/'+ path.basename(atlas)[:-4] + '.json'
				jfile = open(atlas_name,'w')
				jfile.write(j)
				jfile.close()
 def save_roi_data(self, atlas='all'):
     """save roi data for all atlases in atlas_list in a json file in data subfolder"""
     if atlas == 'all':
         for atlas in self.atlas_list:
             res = nr.average_within_regions(self.dataset, atlas)
             j = json.dumps(res.tolist())
             atlas_name = 'data/' + path.basename(atlas)[:-4] + '.json'
             #self.json_files.append(atlas_name)
             jfile = open(atlas_name, 'w')
             jfile.write(j)
             jfile.close()
示例#9
0
    def load_data(self, features, X_threshold):
        """ Load data into c_data """
        # Load data for each mask
        self.load_mask_data(features)

        filename = path.join(mkdtemp(), 'c_data.dat')
        self.c_data = np.memmap(filename, dtype='object',
                                mode='w+', shape=(self.mask_num))

        all_ids = self.dataset.image_table.ids

        # If a low thresh is set, then get ids for studies at that threshold
        if self.thresh_low is not None:
            ids_by_masks_low = []
            from neurosynth.analysis.reduce import average_within_regions
            masks_by_studies_low = average_within_regions(
                self.dataset, self.mask_img, threshold=self.thresh_low)
            for mask in masks_by_studies_low:
                m_ids = np.array(all_ids)[np.where(mask == True)[0]]
                ids_by_masks_low.append(m_ids)       

        # Set up data into c_data
        for num, on_ids in enumerate(self.ids_by_masks):

            # If a low threshold is set, then use that to filter "off_ids", otherwise use "on_ids"
            if self.thresh_low is not None:
                off_ids = list(set(all_ids) - set(ids_by_masks_low[num]))
            else:
                off_ids = list(set(all_ids) - set(on_ids))

            on_data = self.data_by_masks[num].dropna()

            off_data = self.dataset.get_feature_data(ids=off_ids).dropna()

            y = np.array([0] * off_data.shape[0] + [1] * on_data.shape[0])

            X = np.vstack((np.array(off_data), np.array(on_data)))

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            self.c_data[num] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []

        self.comparisons = range(0, self.mask_num)

        self.comp_dims = (self.mask_num, )
示例#10
0
文件: mv.py 项目: csddzh/NS_Classify
def bootstrap_mv_full(dataset, clf, scorer, mask, features=None, processes=None, boot_n=100, method='combinatorial', outfile=None, thresh_high=0.05, thresh_low=0):
    from neurosynth.analysis.reduce import average_within_regions

    if processes != 1:
        from multiprocessing import Pool
        pool = Pool(processes=processes)
    else:
        pool = itertools

    pb = tools.ProgressBar(boot_n, start=True)

    if method == 'shannons':
        cols = ['shannons', 'region', 'boot_n']
    else:
        cols = ['score', 'num_features', 'feature', 'region', 'boot_n']

    X = dataset.get_feature_data(features=features)
    y_high = average_within_regions(dataset, mask, threshold=thresh_high)
    y_low = average_within_regions(dataset, mask, threshold=thresh_low)
    # Get feature names
    overall_results = []
    for result in pool.imap(bootstrap_mv_full_parallel, itertools.izip(itertools.repeat((
            X, y_high, y_low, clf, scorer, method)), range(boot_n))):
        pb.next()

        if result is not None:
            if method != 'shannons':
                for row in result:
                    overall_results.append(row)
            else:
                overall_results.append(result)

            if outfile is not None:
                pd.DataFrame(overall_results, columns=cols).to_csv(outfile)
                print "Saved"

    overall_results = pd.DataFrame(overall_results, columns=cols)
    overall_results.region += 1
    return overall_results
示例#11
0
 def _roi_association(self, imgs_to_decode, value='z', binarize=None):
     """ Computes the strength of association between activation in a mask
     and presence/absence of a semantic feature. This is essentially a
     generalization of the voxel-wise reverse inference z-score to the
     multivoxel case.
     """
     imgs_to_decode = imgs_to_decode.squeeze()
     x = average_within_regions(self.dataset, imgs_to_decode).astype(float)
     y = self.dataset.feature_table.data[self.feature_names].values
     if binarize is not None:
         y[y > binarize] = 1.
         y[y < 1.] = 0.
     r = self._xy_corr(x.T, y)
     if value == 'r':
         return r
     elif value == 'z':
         f_r = np.arctanh(r)
         return f_r*np.sqrt(y.shape[0]-3)
示例#12
0
 def _roi_association(self, imgs_to_decode, value='z', binarize=None):
     """ Computes the strength of association between activation in a mask
     and presence/absence of a semantic feature. This is essentially a
     generalization of the voxel-wise reverse inference z-score to the
     multivoxel case.
     """
     imgs_to_decode = imgs_to_decode.squeeze()
     x = average_within_regions(self.dataset, imgs_to_decode).astype(float)
     y = self.dataset.feature_table.data[self.feature_names].values
     if binarize is not None:
         y[y > binarize] = 1.
         y[y < 1.] = 0.
     r = self._xy_corr(x.T, y)
     if value == 'r':
         return r
     elif value == 'z':
         f_r = np.arctanh(r)
         return f_r*np.sqrt(y.shape[0]-3)
示例#13
0
print "Getting ready to cluster"
ns_regions = Parallel(n_jobs=1)(delayed(cluster_ward)(dataset, d, r, regions)
                                for d, r in distances)

print "Classifying"
### For number of regions
for n_ix, n_regions in enumerate(regions):
    ns_matched_regions = zip(*ns_regions)[n_ix]

    all_predictions = []
    ### For each fold, predict activation with topics using corresponding clustering
    for fold_i, (train_index, test_index) in enumerate(cver):
        ys = (dataset.feature_table.data.values > 0.001).astype('int').T

        X = average_within_regions(dataset, ns_matched_regions[fold_i]).T
        match_predictions = Parallel(n_jobs=-1)(
            delayed(fit_predict)(classifier, X[train_index, :], X[
                test_index, :], y[train_index], y[test_index]) for y in ys)
        match_predictions = pd.DataFrame(match_predictions,
                                         columns=['y_test', 'y_pred'])
        match_predictions['region'] = dataset.get_feature_names()
        match_predictions['atlas'] = 'ns_%s' % name
        match_predictions['fold'] = fold_i
        match_predictions['n_regions'] = n_regions
        all_predictions.append(match_predictions)

    all_predictions = pd.concat(all_predictions)
    all_predictions.to_pickle(
        '/home/delavega/projects/classification/results/cv_clust_predict/topics_%s_%d.pkl'
        % (name, n_regions))
predictions = []
n_regions = int(match_regions.get_data().max())

### Generate matched clustering for each fold of ns data
ns_regions = Parallel(n_jobs=-1)(delayed(cv_cluster)(
    dataset, reference.data[:, train_index], match_regions, train_index)
                                 for train_index, _ in cver)

all_predictions = []
### For each fold, predict activation with topics using corresponding clustering
for fold_i, (train_index, test_index) in enumerate(cver):
    X_train = dataset.feature_table.data.iloc[train_index, :]
    X_test = dataset.feature_table.data.iloc[test_index, :]

    ## Predict using matched neurosynth atlas
    ys = average_within_regions(dataset, ns_regions[fold_i],
                                threshold=0.05).astype('int64')
    ns_predictions = Parallel(n_jobs=-1)(delayed(fit_predict)(
        classifier, X_train, X_test, y[train_index], y[test_index])
                                         for y in ys)
    ns_predictions = pd.DataFrame(ns_predictions, columns=['y_test', 'y_pred'])
    ns_predictions['region'] = range(0, n_regions + 1)
    ns_predictions['atlas'] = 'ns_craddock'
    ns_predictions['fold'] = fold_i
    all_predictions.append(ns_predictions)

    ## Predict using original craddock atlas
    ys = average_within_regions(dataset, match_regions,
                                threshold=0.05).astype('int64')
    match_predictions = Parallel(n_jobs=-1)(delayed(fit_predict)(
        classifier, X_train, X_test, y[train_index], y[test_index])
                                            for y in ys)