def neurosynthInit(dbsize): print "Initializing Neurosynth database..." dataset = Dataset('data/' + dbsize + 'terms/database.txt') dataset.add_features('data/' + dbsize + 'terms/features.txt') #print "Loading standard space brain..." #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz") #standard = img.get_data() return dataset
def neurosynthInit(dbsize): """Initialize Neurosynth Database, return database object""" print "Initializing Neurosynth database..." db = Dataset('data/' + str(dbsize) + 'terms/database.txt') db.add_features('data/' + str(dbsize) + 'terms/features.txt') #print "Loading standard space brain..." #img = nb.load("data/MNI152_T1_2mm_brain.nii.gz") #standard = img.get_data() return db
def get_dataset(self, force_load=False): if os.path.exists(os.path.join(self.datadir, 'dataset.pkl')) and not force_load: print('loading database from', os.path.join(self.datadir, 'dataset.pkl')) self.dataset = Dataset.load( os.path.join(self.datadir, 'dataset.pkl')) else: print('loading database - this takes a few minutes') self.dataset = Dataset(os.path.join(self.datadir, 'database.txt')) self.dataset.add_features( os.path.join(self.datadir, 'features.txt')) self.dataset.save(os.path.join(self.datadir, 'dataset.pkl'))
def __init__(self, db, dataset=None, studies=None, features=None, reset_db=False, reset_dataset=False, download_data=False): """ Initialize instance from a pickled Neurosynth Dataset instance or a pair of study and analysis .txt files. Args: db: the SQLAlchemy database connection to use. dataset: an optional filename of a pickled neurosynth Dataset instance. studies: name of file containing activation data. If passed, a new Dataset instance will be constructed. features: name of file containing feature data. reset_db: if True, will drop and re-create all database tables before adding new content. If False (default), will add content incrementally. reset_dataset: if True, will regenerate the pickled Neurosynth dataset. download_data: if True, ignores any existing files and downloads the latest Neurosynth data files from GitHub. """ if (studies is not None and not os.path.exists(studies)) \ or settings.RESET_ASSETS: print("WARNING: RESETTING ALL NEUROSYNTH ASSETS!") self.reset_assets(download_data) # Load or create Neurosynth Dataset instance if dataset is None or reset_dataset or (isinstance(dataset, str) and not os.path.exists(dataset)): print("\tInitializing a new Dataset...") if (studies is None) or (features is None): raise ValueError( "To generate a new Dataset instance, both studies and " "analyses must be provided.") dataset = Dataset(studies) dataset.add_features(features) dataset.save(settings.PICKLE_DATABASE) else: print("Loading existing Dataset...") dataset = Dataset.load(dataset) if features is not None: dataset.add_features(features) self.dataset = dataset self.db = db if reset_db: print("WARNING: RESETTING DATABASE!!!") self.reset_database()
def _getdata(): """Downloads data from neurosynth and returns it as a Dataset. Also pickles the dataset for future use.""" LOG.warning("Downloading and processing Neurosynth database") os.makedirs("data", exist_ok=True) from neurosynth.base.dataset import download download(path="data", unpack=True) data = Dataset("data/database.txt") data.add_features("data/features.txt") data.save("data/dataset.pkl") return data
def test_dataset_save_and_load(self): # smoke test of saving and loading t = tempfile.mktemp() self.dataset.save(t, keep_mappables=True) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertIsNotNone(dataset) self.assertIsNotNone(dataset.mappables) self.assertEqual(len(dataset.mappables), 5) # Now with the mappables deleted dataset.save(t) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertEqual(len(dataset.mappables), 0) os.unlink(t)
def create_voxel_x_feature_matrix(path_to_dataset, path_to_image_files): dataset = Dataset.load(path_to_dataset) feature_list = dataset.get_feature_names() vox_feat_matrix = zeros((dataset.volume.num_vox_in_mask, len(feature_list)), dtype=int16) for (i,feature) in enumerate(feature_list): image_path = path_to_image_files + feature + '_pFgA_z.nii.gz' vox_feat_matrix[:,i] = dataset.volume.mask(image_path) return vox_feat_matrix
def generate_maps(terms,output_dir): f,d = download_data() features = pandas.read_csv(f,sep="\t") database = pandas.read_csv(d,sep="\t") output_dir = "%s/maps" %(output_dir) print "Deriving pickled maps to extract relationships from..." dataset = Dataset(d) dataset.add_features(f) for t in range(len(terms)): term = terms[t] print "Generating P(term|activation) for term %s, %s of %s" %(term,t,len(terms)) ids = dataset.get_ids_by_features(term) maps = meta.MetaAnalysis(dataset,ids) term_name = term.replace(" ","_") pickle.dump(maps.images["pFgA_z"],open("%s/%s_pFgA_z.pkl" %(output_dir,term_name),"wb"))
def test_dataset_initializes(self): """ Test whether dataset initializes properly. """ Dataset(get_test_data_path() + 'test_dataset.txt', get_test_data_path() + 'test_features.txt') self.assertIsNotNone(self.dataset.masker) self.assertIsNotNone(self.dataset.image_table) self.assertEqual(len(self.dataset.mappables), 5) self.assertIsNotNone(self.dataset.masker) self.assertIsNotNone(self.dataset.r) self.assertIsNotNone(self.dataset.mappables[0].data['extra_field'].iloc[2], 'field')
def test_dataset_save_and_load(self): # smoke test of saving and loading t = tempfile.mktemp() self.dataset.save(t) self.assertTrue(os.path.exists(t)) dataset = Dataset.load(t) self.assertIsNotNone(dataset) self.assertEqual(len(dataset.image_table.ids), 5) os.unlink(t)
def __init__(self, db, dataset=None, studies=None, features=None, reset_db=False, reset_dataset=False, download_data=True): """ Initialize instance from a pickled Neurosynth Dataset instance or a pair of study and analysis .txt files. Args: db: the SQLAlchemy database connection to use. dataset: an optional filename of a pickled neurosynth Dataset instance. Note that the Dataset must contain the list of Mappables (i.e., save() must have been called with keep_mappables set to True). studies: name of file containing activation data. If passed, a new Dataset instance will be constructed. features: name of file containing feature data. reset_db: if True, will drop and re-create all database tables before adding new content. If False (default), will add content incrementally. reset_dataset: if True, will regenerate the pickled Neurosynth dataset. download_data: if True, ignores any existing files and downloads the latest Neurosynth data files from GitHub. """ if (studies is not None and not os.path.exists(studies)) \ or settings.RESET_ASSETS: print "WARNING: RESETTING ALL NEUROSYNTH ASSETS!" self.reset_assets(download_data) # Load or create Neurosynth Dataset instance if dataset is None or reset_dataset or (isinstance(dataset, basestring) and not os.path.exists(dataset)): print "\tInitializing a new Dataset..." if (studies is None) or (features is None): raise ValueError( "To generate a new Dataset instance, both studies and " "analyses must be provided.") dataset = Dataset(studies) dataset.add_features(features) dataset.save(settings.PICKLE_DATABASE, keep_mappables=True) else: print "\tLoading existing Dataset..." dataset = Dataset.load(dataset) if features is not None: dataset.add_features(features) self.dataset = dataset self.db = db if reset_db: print "WARNING: RESETTING DATABASE!!!" self.reset_database()
def fetch_neurosynth_dataset(data_dir, return_pkl=True): """Downloads the Neurosynth dataset Parameters ---------- data_dir : str Directory in which to download the dataset. return_pkl : bool If true, creates and returns the .pkl file. Otherwise returns the dataset and features files. Returns ------- tuple, str If save_pkl is false, returns a tuple containing the path to the database.txt and the features.txt file. Otherwise returns the path to the .pkl file. """ if not os.path.isdir(data_dir): os.mkdir(data_dir) dataset_file = os.path.join(data_dir, "database.txt") if not os.path.isfile(dataset_file): logging.info("Downloading the Neurosynth dataset.") download(data_dir, unpack=True) feature_file = os.path.join(data_dir, "features.txt") if return_pkl: pkl_file = os.path.join(data_dir, "dataset.pkl") if not os.path.isfile(pkl_file): logging.info( "Converting Neurosynth data to a .pkl file. This may take a while." ) dataset = Dataset(dataset_file, feature_file) dataset.save(pkl_file) return pkl_file return (dataset_file, feature_file)
def save_dataset_instance(db_filename, kw_filename, instance_filename): # Create a new Dataset instance dataset = Dataset('./raw_data/' + db_filename + '.txt') # Add some features dataset.add_features('./raw_data/' + kw_filename + '.txt') # Save new file dataset.save('./raw_data/' + instance_filename + '.pkl') return dataset
def extract_relations(terms,maps_dir,output_dir): if isinstance(terms,str): terms = [terms] f,d = download_data() features = pandas.read_csv(f,sep="\t") database = pandas.read_csv(d,sep="\t") allterms = features.columns.tolist() allterms.pop(0) #pmid dataset = Dataset(d) dataset.add_features(f) image_matrix = pandas.DataFrame(columns=range(228453)) for t in range(len(allterms)): term = allterms[t] term_name = term.replace(" ","_") pickled_map = "%s/%s_pFgA_z.pkl" %(maps_dir,term_name) if not os.path.exists(pickled_map): print "Generating P(term|activation) for term %s" %(term) ids = dataset.get_ids_by_features(term) maps = meta.MetaAnalysis(dataset,ids) pickle.dump(maps.images["pFgA_z"],open(pickled_map,"wb")) map_data = pickle.load(open(pickled_map,"rb")) image_matrix.loc[term] = map_data sims = pandas.DataFrame(columns=image_matrix.index) tuples = [] for t1 in range(len(terms)): term1 = terms[t1] print "Extracting NeuroSynth relationships for term %s..." %(term1) for t2 in range(len(terms)): term2 = terms[t2] if t1<t2: score = pearsonr(image_matrix.loc[term1],image_matrix.loc[term2])[0] tuples.append((term1,term2,score)) save_relations(output_dir=output_dir,relations=tuples)
class TestAnalysis(unittest.TestCase): def setUp(self): """ Create a new Dataset and add features. """ self.dataset = Dataset('data/test_dataset.txt') self.dataset.add_features('data/test_features.txt') def test_meta_analysis(self): """ Test full meta-analysis stream. """ pass def test_decoder(self): pass def test_coactivation(self): """ Test seed-based coactivation. """ pass def test_roi_averaging(self): pass def test_get_random_voxels(self): pass
def functional_preference_profile_prep(): """ Function for extracting functional preference profile data """ from neurosynth.base.dataset import Dataset dataset = Dataset.load("data/neurosynth_60_0.4.pkl") nicknames = pd.read_csv('data/v4-topics-60.txt', delimiter='\t') nicknames['topic_name'] = nicknames.apply(lambda row: '_'.join([str(row.topic_number)] + row.top_words.split(' ')[0:3]), axis=1) nicknames = nicknames.sort_values('topic_name') word_keys = pd.read_csv("data/topic_keys60-july_cognitive.csv") word_keys['top_2'] = word_keys['Top words'].apply(lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]) word_keys['topic_name'] = "topic" + word_keys['topic'].astype('str') return dataset,nicknames,word_keys
def import_neurosynth_git(self): # Add the appropriate neurosynth git folder to the python path. sys.path.append(self.npath) from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta # Try to load a pickle if it exists. Create a new dataset instance # if it doesn't. try: self.dataset = cPickle.load( open(self.npath+os.sep+'data/dataset.pkl', 'rb')) except IOError: # Create Dataset instance from a database file. self.dataset = Dataset(self.npath+os.sep+'data/database.txt') # Load features from file self.dataset.add_features(self.npath+os.sep+'data/features.txt') # Get names of features. self.feature_list = self.dataset.get_feature_names()
def __init__( self, metric="emd", image_type="pAgF", name=None, multi=True, image_transform="block_reduce", downsample=8, auto_save=True, ): self.image_type = image_type self.multi = multi self.downsample = downsample self.auto_save = auto_save if callable(metric): self.metric = metric elif metric == "emd": self.metric = euclidean_emd else: raise ValueError("{metric} is not a valid metric".format(**locals())) if callable(image_transform): self.image_transform = image_transform elif image_transform == "block_reduce": from functools import partial self.image_transform = partial(block_reduce, factor=downsample) # def block_reduce_transform(image): # """The default transformation.""" # return block_reduce(image, downsample, blur) # self.image_transform = block_reduce_transform else: raise ValueError(("{image_transform} is not a valid" "transform function").format(**locals())) self.name = name if name else time.strftime("analysis_from_%m-%d_%H-%M-%S") try: self.data = Dataset.load("data/dataset.pkl") except FileNotFoundError: self.data = _getdata()
from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta """ Create a new Dataset instance from a database file and load features. This is basically the example from the quickstart in the README. Assumes you have database.txt and features.txt files in the current dir. """ # Create Dataset instance from a database file. dataset = Dataset('database.txt') # Load features from file dataset.add_features('features.txt') # Pickle the Dataset to file so we can use Dataset.load() next time # instead of having to sit through the generation process again. dataset.save('dataset.pkl') # Get Mappable IDs for all features that start with 'emo' ids = dataset.get_ids_by_features('emo*', threshold=0.001) # Run a meta-analysis and save results ma = meta.MetaAnalysis(dataset, ids) ma.save_results('emotion')
def get_test_dataset(): test_data_path = get_test_data_path() dataset = Dataset(test_data_path + 'test_dataset.txt') dataset.add_features(test_data_path + 'test_features.txt') return dataset
class TestBase(unittest.TestCase): def setUp(self): """ Create a new Dataset and add features. """ self.dataset = Dataset('data/test_dataset.txt') self.dataset.add_features('data/test_features.txt') def test_dataset_initializes(self): """ Test whether dataset initializes properly. """ self.assertIsNotNone(self.dataset.volume) self.assertIsNotNone(self.dataset.image_table) self.assertEqual(len(self.dataset.mappables), 5) self.assertIsNotNone(self.dataset.volume) self.assertIsNotNone(self.dataset.r) def test_image_table_loads(self): """ Test ImageTable initialization. """ self.assertIsNotNone(self.dataset.image_table) it = self.dataset.image_table self.assertEqual(len(it.ids), 5) self.assertIsNotNone(it.volume) self.assertIsNotNone(it.r) self.assertEqual(it.data.shape, (228453, 5)) # Add tests for values in table def test_feature_table_loads(self): """ Test FeatureTable initialization. """ tt = self.dataset.feature_table self.assertIsNotNone(tt) self.assertEqual(len(self.dataset.list_features()), 5) self.assertEqual(tt.data.shape, (5,5)) self.assertEqual(tt.feature_names[3], 'f4') self.assertEqual(tt.data[0,0], 0.0003) def test_feature_search(self): """ Test feature-based Mappable search. Tests both the FeatureTable method and the Dataset wrapper. """ tt = self.dataset.feature_table features = tt.search_features(['f*']) self.assertEqual(len(features), 4) d = self.dataset ids = d.get_ids_by_features(['f*'], threshold=0.001) self.assertEqual(len(ids), 4) img_data = d.get_ids_by_features(['f1', 'f3', 'g1'], 0.001, func='max', get_image_data=True) self.assertEqual(img_data.shape, (228453, 5)) def test_selection_by_mask(self): """ Test mask-based Mappable selection. Only one peak in the test dataset (in study5) should be within the sgACC. """ ids = self.dataset.get_ids_by_mask('data/sgacc_mask.nii.gz') self.assertEquals(len(ids), 1) self.assertEquals('study5', ids[0]) def test_selection_by_peaks(self): """ Test peak-based Mappable selection. """ ids = self.dataset.get_ids_by_peaks(np.array([[3, 30, -9]])) self.assertEquals(len(ids), 1) self.assertEquals('study5', ids[0]) # def test_invalid_coordinates_ignored(self): """ Test dataset contains 3 valid coordinates and one outside mask. But this won't work
from neurosynth.analysis.cluster import magic from neurosynth.base.dataset import Dataset # mydir = "/home/delavega/projects/permutation_clustering/" mydir = "../data/" dataset = Dataset.load(mydir + '/datasets/abs_60topics_filt_jul.pkl') roi_mask = mydir + '../masks/new_medial_fc_30.nii.gz' global_mask = mydir + "../masks/MNI152_T1_2mm_brain.nii.gz" magic(dataset, 10, method='coactivation', features=['topic57', 'topic32', 'topic39', 'topic44'], output_dir='../results/ef_cluster/all_cluster/', min_studies_per_voxel=100, filename='okay')
# -*- coding: utf-8 -*- # Here I use Yeo to test Neurosynth's classify functions from neurosynth.base.dataset import Dataset from neurosynth.analysis import classify import os import itertools import re import numpy as np import pdb import sys from nipype.interfaces import fsl from sklearn.ensemble import GradientBoostingClassifier dataset = Dataset.load('../data/pickled.pkl') masklist = ['7Networks_Liberal_1.nii.gz', '7Networks_Liberal_2.nii.gz', '7Networks_Liberal_3.nii.gz', '7Networks_Liberal_4.nii.gz', '7Networks_Liberal_5.nii.gz', '7Networks_Liberal_6.nii.gz', '7Networks_Liberal_7.nii.gz'] rootdir = '../masks/Yeo_JNeurophysiol11_MNI152/standardized/' class maskClassifier: def __init__(self, classifier=GradientBoostingClassifier(), param_grid={'max_features': np.arange(2, 140, 44), 'n_estimators': np.arange(5, 141, 50), 'learning_rate': np.arange(0.05, 1, 0.1)}, thresh = 0.08) diffs = {}
import neurosynth.base.dataset from neurosynth.base.dataset import Dataset print neurosynth.base.dataset.__file__ dataset = Dataset('../data/unprocessed/abstract/full_database_revised.txt') dataset.add_features('../data/unprocessed/abstract/abstract_features.txt') dataset.save('../data/datasets/dataset_abs_words_pandas.pkl') dataset = Dataset('../data/unprocessed/abstract/full_database_revised.txt') dataset.add_features('../data/unprocessed/abstract_topics/doc_features100.txt') dataset.save('../data/datasets/dataset_abs_topics_pandas.pkl')
class NeurosynthMerge: def __init__(self, thesaurus, npath, outdir, test_mode=False): """ Generates a new set of images using the neurosynth repository combining across terms in a thesarus. Args: - thesaurus: A list of tuples where:[('term that will be the name of the file', 'the other term', 'expression combining the terms')] - the last expression is alphanumeric and separated by: (& for and) (&~ for andnot) (| for or) - npath: directory where the neurosynth git repository is locally on your machine (https://github.com/neurosynth/neurosynth) - outdir: directory where the generated images will be saved - test_mode: when true, the code will run an abridged version for test purposes (as implemented by test.Neurosynth.py) """ self.thesaurus = thesaurus self.npath = npath self.outdir = outdir self.import_neurosynth_git() from neurosynth.analysis import meta # Take out first two terms from the feature_list and insert the third # term from the tuple. for triplet in thesaurus: self.feature_list = [feature for feature in self.feature_list \ if feature not in triplet] self.feature_list.append(triplet[-1]) # This makes an abridged version of feature_list for testing purposes. if test_mode: self.feature_list = [triplet[-1] for triplet in thesaurus] # Run metanalyses on the new features set and save the results to the #outdir. for feature in self.feature_list: self.ids = self.dataset.get_ids_by_expression(feature, threshold=0.001) ma = meta.MetaAnalysis(self.dataset, self.ids) # Parse the feature name (to avoid conflicts with illegal #characters as file names) regex = re.compile('\W+') split = re.split(regex, feature) feat_fname = split[0] # Save the results (many different types of files) ma.save_results(self.outdir+os.sep+feat_fname) def import_neurosynth_git(self): # Add the appropriate neurosynth git folder to the python path. sys.path.append(self.npath) from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta # Try to load a pickle if it exists. Create a new dataset instance # if it doesn't. try: self.dataset = cPickle.load( open(self.npath+os.sep+'data/dataset.pkl', 'rb')) except IOError: # Create Dataset instance from a database file. self.dataset = Dataset(self.npath+os.sep+'data/database.txt') # Load features from file self.dataset.add_features(self.npath+os.sep+'data/features.txt') # Get names of features. self.feature_list = self.dataset.get_feature_names()
def __init__(self, dbsize): print "Initializing Neurosynth database..." self.db = Dataset("data/" + str(dbsize) + "terms/database.txt") self.db.add_features("data/" + str(dbsize) + "terms/features.txt") self.ids = self.getIDs() self.decoder = None
class NeuroSynth: """Initialize Neurosynth Database""" def __init__(self, dbsize): print "Initializing Neurosynth database..." self.db = Dataset("data/" + str(dbsize) + "terms/database.txt") self.db.add_features("data/" + str(dbsize) + "terms/features.txt") self.ids = self.getIDs() self.decoder = None # self.masker = mask.Mask("data/X.nii.gz") """Do contrast analysis between two sets of """ def neurosynthContrast(self, papers1, papers2, fdr, outdir=None, outprefix=None, image_list=None): # Do a meta analysis to contrast the two ma = meta.MetaAnalysis(self.db, papers1, papers2, q=float(fdr)) if outdir: print "Saving results to %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list) return ma.images """Conduct meta analysis with particular set of ids""" def neurosynthMeta(self, papers, fdr, outdir=None, outprefix=None, image_list=None): # Get valid ids from user list valid_ids = self.get_valid_ids(papers) if len(valid_ids) > 0: # Do meta analysis ma = meta.MetaAnalysis(self.db, valid_ids, q=float(fdr)) if outdir: print "Saving results to output directory %s" % (outdir) ma.save_results(outdir, prefix=outprefix, prefix_sep="_", image_list=image_list) return ma.images else: print "No studies found in database for ids in question!" """Return list of valid ids from user input""" def get_valid_ids(self, papers): # Input is DOI with list of papers valid_ids = [x for x in papers if int(x.strip(" ")) in self.ids] print "Found %s valid ids." % (str(len(valid_ids))) return valid_ids """Decode an image, return 100 results""" def decode(self, images, outfile, mrs=None, round=4): if not self.decoder: self.decoder = decode.Decoder(self.db) # If mrs is not specified, do decoding against neurosynth database if not mrs: result = self.decoder.decode(images, save=outfile) # If mrs is specified, do decoding against custom set of images else: # This is akin to traditional neurosynth method - pearson's r correlation imgs_to_compare = imageutils.load_imgs(mrs, self.masker) imgs_to_decode = imageutils.load_imgs(images, self.masker) x, y = imgs_to_compare.astype(float), imgs_to_decode.astype(float) x, y = x - x.mean(0), y - y.mean(0) x, y = x / np.sqrt((x ** 2).sum(0)), y / np.sqrt((y ** 2).sum(0)) result = np.around(x.T.dot(y).T, round) features = [os.path.basename(m) for m in mrs] rownames = [os.path.basename(m) for m in images] df = pd.DataFrame(result, columns=features) df.index = rownames df.to_csv(outfile, sep="\t") return result """Return features in neurosynth database""" def getFeatures(self, dataset): return dataset.get_feature_names() """Extract pubmed IDs or dois from Neurosynth Database""" def getIDs(self): # Get all IDs in neuroSynth return self.db.image_table.ids """Extract author names for a given pmid or doi""" def getAuthor(self, db, id): article = self.db.get_mappables(id) meta = article[0].__dict__ tmp = meta["data"]["authors"] tmp = tmp.split(",") authors = [x.strip("^ ") for x in tmp] return authors """Extract all author names in database""" def getAuthors(self, db): articles = db.mappables uniqueAuthors = [] for a in articles: meta = a.__dict__ tmp = meta["data"]["authors"] tmp = tmp.split(",") authors = [x.strip("^ ") for x in tmp] for a in authors: uniqueAuthors.append(a) uniqueAuthors = list(np.unique(uniqueAuthors)) return uniqueAuthors """Extract activation points and all meta information for a particular pmid""" def getPaperMeta(self, db, pmid): articles = db.mappables m = [] for a in articles: tmp = a.__dict__ if tmp["data"]["id"] == str(pmid): journal = tmp["data"]["journal"] title = tmp["data"]["title"] year = tmp["data"]["year"] doi = tmp["data"]["doi"] auth = tmp["data"]["authors"] peaks = tmp["data"]["peaks"] pmid = tmp["data"]["id"] tmp = (journal, title, year, doi, pmid, auth, peaks) m.append(tmp) return m
from sklearn.cluster import KMeans, DBSCAN, MiniBatchKMeans from sklearn import metrics from scipy import stats base_path = '/home/pauli/Development/neurobabel/' test_data_path = base_path + 'ACE/' masker_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_one_sm_v2.nii.gz' atlas_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_atlas_brain_sm_v2.nii.gz' mask = nib.load(masker_filename) masker = Masker(mask) r = 1.0 transform = {'BREGMA': transformations.bregma_to_whs()} target = 'WHS' # load data set dataset = Dataset(os.path.join(test_data_path, 'db_bregma_cog_atlas_export.txt'), masker=masker_filename, r=r, transform=transform, target=target) dataset.feature_table = FeatureTable(dataset) dataset.add_features(os.path.join(test_data_path, "db_bregma_cog_atlas_features.txt")) # add features fn = dataset.get_feature_names() features = dataset.get_feature_data() n_xyz, n_articles = dataset.image_table.data.shape # do topic modeling (LSA) n_components = 20 svd = TruncatedSVD(n_components=n_components) X = svd.fit_transform(features) X_orig = X.copy() X = StandardScaler().fit_transform(X_orig) # db = DBSCAN(eps=10.0, min_samples=10).fit(X)
from neurosynth.base.dataset import Dataset from sklearn.cluster import KMeans from sklearn.cluster import Ward import numpy as np from neurosynth.base.imageutils import save_img from scipy import sparse dataset = Dataset.load("../data/datasets/abs_topics_filt.pkl") print "Filtering voxels..." data = dataset.image_table.data.toarray() voxel_mask = data.mean(axis=1) > 0.0135 good_voxels = data[voxel_mask] good_voxels = sparse.csr_matrix(good_voxels) for i in [20, 30, 40, 50]: print "Clustering..." print i k_means = KMeans(init='k-means++', n_clusters=i, n_jobs=16) k_means.fit(good_voxels) # ward = Ward(n_clusters=30) # ward.fit(good_voxels)
def dataset(self): return Dataset.load(settings.PICKLE_DATABASE)
def get_test_dataset(prefix='test'): test_data_path = get_test_data_path() dataset = Dataset(test_data_path + '%s_dataset.txt' % prefix) dataset.add_features(test_data_path + '%s_features.txt' % prefix) return dataset
def load_dataset_instance(instance_filename): dataset = Dataset.load('./raw_data/' + instance_filename + '.pkl') return dataset
from neurosynth.base.dataset import Dataset import joblib from sklearn.metrics import pairwise_distances from sklearn.preprocessing import scale from neurosynth.analysis.cluster import Clusterable dataset = Dataset.load('/projects/delavega/dbs/db_v6_topics-100.pkl') from fastcluster import ward roi = Clusterable( dataset, '/home/delavega/projects/classification/masks/l_70_mask.nii.gz') saved_pca = '/projects/delavega/clustering/dv_v6_reference_pca.pkl' reference = joblib.load(saved_pca) distances = pairwise_distances(roi.data, reference.data, metric='correlation') distances = scale(distances, axis=1) joblib.dump( distances, '/home/delavega/projects/clustering/results/hierarchical/v6_distances_l_70_scaled.pkl' ) Z = ward(distances) joblib.dump( Z, '/home/delavega/projects/clustering/results/hierarchical/v6_ward_l70_scaled.pkl' )
from neurosynth.analysis import meta base_path = '/home/pauli/Development/neurobabel/' test_data_path = base_path + 'ACE/' masker_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_one_sm_v2.nii.gz' atlas_filename = base_path + 'atlases/whs_sd/WHS_SD_rat_atlas_brain_sm_v2.nii.gz' mask = nb.load(masker_filename) masker = Masker(mask) r = 1.0 # transform = {'BREGMA': transformations.bregma_to_whs()} #transform = {'BREGMA': transformations.identity()} transform = {'BREGMA': transformations.bregma_to_whs()} target = 'WHS' # load data set dataset = Dataset(os.path.join(test_data_path, 'db_bregma_export.txt'), masker=masker_filename, r=r, transform=transform, target=target) dataset.feature_table = FeatureTable(dataset) dataset.add_features(os.path.join(test_data_path, "db_bregma_features.txt")) # add features fn = dataset.get_feature_names() def get_whs_labels(filename=os.path.join(base_path, "atlases/whs_sd/WHS_SD_rat_atlas_v2.label")): ''' load the names of all labelled areas in the atlas (e.g. brainstem), return list of them ''' in_file = open(filename, 'r') lines = in_file.readlines() labels = {} for line in lines: start = line.find("\"") + 1 if start > 0: stop = line.find("\"", start) label = line[start:stop] idx = line.split()[0]
from neurosynth.base.dataset import Dataset import neurosynth.base.imageutils as it dataset = Dataset.load("../data/datasets/abs_topics_filt.pkl") print "Filtering voxels..." data = dataset.image_table.data.toarray() voxel_mask = data.mean(axis=1) > 0.005 img = it.load_imgs('../masks/ward/30.nii.gz', dataset.masker) good_voxels = img[voxel_mask] it.save_img(good_voxels, "../masks/ward/30_masked.nii.gz", dataset.masker)
from sklearn.metrics import roc_auc_score import sys from base.mv import bootstrap_mv_full from neurosynth.base.dataset import Dataset dataset = Dataset.load("../permutation_clustering/abs_60topics_filt_jul.pkl") from sklearn.linear_model import LassoLarsIC print sys.argv try: cmd, iterations, job_id = sys.argv except: raise Exception("Incorect number of arguments") import csv cognitive_topics = ['topic' + topic[0] for topic in csv.reader(open('topic_keys60-july_cognitive.csv', 'rU')) if topic[1] == "T"] results = bootstrap_mv_full(dataset, LassoLarsIC(), roc_auc_score, '../permutation_clustering/results/medial_fc_30_kmeans/kmeans_k9/cluster_labels.nii.gz', features=cognitive_topics, processes=None, boot_n=int(iterations), outfile='results/bootstrap_full_mv_' + str(iterations) + '_mFC__LASSO_LARS_60_ ' + str(job_id) + '.csv')
# pass # def fit(self, X): # self.cf = MFastHCluster(method='ward') # self.cf.linkage(X) # def predict(self, n): # for i in range(1, self.cf.cut(0).shape[0]): # labels = self.cf.cut(i) # if np.bincount(labels).shape[0] == n: # break # return labels mydir = "/projects/delavega/clustering/" dataset = Dataset.load(mydir + 'abs_60topics_filt_jul.pkl') roi_mask = mydir + 'masks/new_medial_fc_30.nii.gz' ns = [3, 9] save_images = True output_dir = join(mydir, 'results/MFC/') out_model = None roi = Clusterable(dataset, roi_mask, min_studies=80) reference = Clusterable(dataset, min_studies=80) reduce_reference = sk_decomp.RandomizedPCA(100) reference = reference.transform(reduce_reference, transpose=True) # distances = pairwise_distances(roi.data, reference.data, # metric='correlation')
t = dlmread('/Users/lukechang/Dropbox/Github/toolbox/Python/ImageProcessing/tal2icbm_fsl.mat'); dat.volInfo.mat = inv(t)*dat.volInfo.mat; dat.fullpath = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend.nii'; write(dat) # 2) Reorient using FSL - Unix fslreorient2std Friend Friend_Or # 3) Coregister to 2mm MNI space - Unix /usr/local/fsl/bin/flirt -in /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or.nii.gz -ref /usr/local/fsl/data/standard/MNI152_T1_2mm_brain -out /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.nii.gz -omat /Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/Friend_Or_Mni.mat -bins 256 -cost corratio -searchrx -90 90 -searchry -90 90 -searchrz -90 90 -dof 12 -interp trilinear # 4) Decode - Python DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl' PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/' INFILE = 'Friend_Or_Mni.nii.gz' dataset = Dataset.load(DATASET_FILE) decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once. img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask) result = decoder.decode(img) np.savetxt(PREFIX + 'Friend_Decoded.txt', result) # 5) Threshold at .001 - unix fslmaths Friend_Or_Mni -thr 3 Friend_Or_Mni_001 # 6) Decode thresholded map - python DATASET_FILE = '/Users/lukechang/Dropbox/Github/neurosynth/topics.pkl' PREFIX = '/Users/lukechang/Research/Trust_Friend/Analyses/NeurosynthDecode/' INFILE = 'Friend_Or_Mni_001.nii.gz' dataset = Dataset.load(DATASET_FILE) decoder = decode.Decoder(dataset) #takes awhile to load, should only do this once. img = imageutils.load_imgs(PREFIX + INFILE, decoder.mask)
from neurosynth.base.dataset import Dataset from neurosynth.analysis.cluster import Clusterable from sklearn import decomposition as sk_decomp import pickle dataset = Dataset.load( '/home/delavega/projects/classification/data/datasets/abs_60topics_filt_jul.pkl' ) out = '/projects/delavega/clustering/dv_v5_reference_min_80_pca.pkl' reference = Clusterable(dataset, min_studies=80) print "Running PCA" reduce_reference = sk_decomp.RandomizedPCA(100) reference = reference.transform(reduce_reference, transpose=True) pickle.dump(reference, open(out, 'w'))
### # This script shuffle the classification labels and reruns classification many times to get data to calculate a confidence interval around the null hypothesis from sklearn.linear_model import RidgeClassifier from base.classifiers import OnevsallClassifier from neurosynth.base.dataset import Dataset from sklearn.metrics import roc_auc_score import pickle from random import shuffle def shuffle_data(classifier): for region in classifier.c_data: shuffle(region[1]) d_abs_topics_filt = Dataset.load('../data/datasets/abs_topics_filt_july.pkl') results = [] clf = OnevsallClassifier(d_abs_topics_filt, '../masks/Ward/50.nii.gz', cv='4-Fold', thresh=10, thresh_low=0, memsave=True, classifier=RidgeClassifier()) clf.load_data(None, None) clf.initalize_containers(None, None, None) for i in range(0, 500): shuffle_data(clf) clf.classify(scoring=roc_auc_score, processes=8, class_weight=None) results = list(clf.class_score) + results print(i),
neurosynth.set_logging_level('info') # <markdowncell> # ## Creating a new dataset # # Next, we create a Dataset, which is the core object most Neurosynth tools operate on. We initialize a Dataset by passing in a database file, which is essentially just a giant list of activation coordinates and associated study IDs. This file can be downloaded from the Neurosynth website or installed from the data submodule (see the Readme for instructions). # # Creating the object will take a few minutes on most machines, as we need to process about 200,000 activations drawn from nearly 6,000 studies. Once that's done, we also need to add some features to the Dataset. Features are just variables associated with the studies in our dataset; literally any dimension a study could be coded on can constitute a feature that Neurosynth can use. In practice, the default set of features included in the data download includes 500 psychological terms (e.g., 'language', 'emotion', 'memory', etc.) that occur with some frequency in the dataset. So when we're talking about the "emotion" feature, we're really talking about how frequently each study in the Dataset uses the word 'emotion' in the full-text of the corresponding article. # # Let's go ahead and create a dataset and add some features: # <codecell> # Create a new Dataset instance dataset = Dataset('data/database.txt') # Add some features dataset.add_features('data/features.txt') # <markdowncell> # Because this takes a while, we'll save our Dataset object to disk. That way, the next time we want to use it, we won't have to sit through the whole creation operation again: # <codecell> dataset.save('dataset.pkl') # <markdowncell> # Now in future, instead of waiting, we could just load the dataset from file:
def setUp(self): """ Create a new Dataset and add features. """ self.dataset = Dataset('data/test_dataset.txt') self.dataset.add_features('data/test_features.txt')
from sklearn.metrics import roc_auc_score import sys from base.mv import bootstrap_mv_full from neurosynth.base.dataset import Dataset dataset = Dataset.load("../permutation_clustering/abs_60topics_filt_jul.pkl") from sklearn.naive_bayes import GaussianNB print sys.argv try: cmd, iterations, job_id = sys.argv except: raise Exception("Incorect number of arguments") import csv cognitive_topics = [ 'topic' + topic[0] for topic in csv.reader(open('topic_keys60-july_cognitive.csv', 'rU')) if topic[1] == "T" ] results = bootstrap_mv_full( dataset, GaussianNB(), roc_auc_score, '../permutation_clustering/results/medial_fc_30_kmeans/kmeans_k9/cluster_labels.nii.gz', features=cognitive_topics, processes=None, boot_n=int(iterations), outfile='results/bootstrap_full_mv_' + str(iterations) + '_mFC_60_ ' + str(job_id) + '.csv')
from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC, RidgeClassifier, RidgeClassifierCV from sklearn.ensemble import GradientBoostingClassifier from base.tools import Logger from base.pipelines import pipeline from base.classifiers import PairwiseClassifier, OnevsallClassifier from neurosynth.base.dataset import Dataset from sklearn.metrics import roc_auc_score now = datetime.datetime.now() n_topics = 60 dataset = Dataset.load('../data/0.6/datasets/db_v6_topics-%d.pkl' % n_topics) # cognitive_topics = ['topic' + topic[0] for topic in csv.reader( # open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys' + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "T"] # junk_topics = ['topic' + topic[0] for topic in csv.reader( # open('../data/unprocessed/abstract_topics_filtered/topic_sets/topic_keys' + str(topics) + '-july_cognitive.csv', 'rU')) if topic[1] == "F"] # Analyses def complete_analysis(dataset, dataset_name, name, masklist, processes=1, features=None):
xfm2vol.run() #make masks to input into neurosynth def cluster2masks(clusterfile): clustermap = nb.load(clusterfile).get_data() for x in range(1, clustermap.max() + 1): clustermask = (clustermap == x).astype(int) nImg = nb.Nifti1Image(clustermask, None) nb.save( nImg, os.path.abspath(clusterfile + '_clustermask' + str(x) + '.nii')) cluster2masks(volume_file) dataset_file = '/home/raid3/watanabe/neurosynth/data/dataset.pkl' if not os.path.exists(dataset_file): dataset = Dataset('/home/raid3/watanabe/neurosynth/data/database.txt') dataset.add_features('/home/raid3/watanabe/neurosynth/data/features.txt') dataset.save(dataset_file) else: dataset = cPickle.load(open(dataset_file, 'rb')) clustermask = volume_file + '_clustermask' + str(3) + '.nii' ids = dataset.get_ids_by_mask(clustermask) features = dataset.feature_table.get_features_by_ids(ids) #mri_surf2vol --identity fsaverage4 --surfval /scr/ilz1/Data/attemptsurface.nii --hemi 'lh' --o /scr/ilz1/Data/results/surf2volume.nii --template /scr/ilz1/Data/freesurfer/fsaverage4/mri/orig.mgz
if mask_img_data.shape != (91, 109, 91): resampled_roi = resample_to_img( roi, mni152_2mm, interpolation="nearest", copy=True ) resampled_file = join(map_dir, "{0}_mni2mm.nii.gz".format(file)) resampled_roi.to_filename(resampled_file) roi_files[i] = resampled_file plot_glass_brain( resampled_file, output_file=join(map_dir, "{0}_mni2mm.png".format(basename(file))), ) print("loading dataset...") tds = datetime.now() dataset = Dataset("/Users/kbottenh/Dropbox/Data/neurosynth-v0.7/database.txt") tdf = datetime.now() print("dataset loaded! only took {0}".format((tdf - tds))) for i in np.arange(0, len(mask_names)): print("{0}\nmeta-analyzing {1}...".format(datetime.now(), mask_names[i])) tmas = datetime.now() ids = dataset.get_studies( mask=roi_files[i], ) ma = meta.MetaAnalysis(dataset, ids) ma.save_results( output_dir=sink_dir, prefix=mask_names[i],
from base.classifiers import OnevsallContinuous from neurosynth.analysis import cluster from neurosynth.base.dataset import Dataset from sklearn.metrics import r2_score from sklearn.linear_model import Ridge import cPickle from sklearn.decomposition import RandomizedPCA dataset = Dataset.load('../data/datasets/abs_60topics_filt_jul.pkl') roi_mask = '../masks/mpfc_nfp.nii.gz' global_mask = "../masks/MNI152_T1_2mm_brain.nii.gz" n_regions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # print resolution clf_file = "../data/clfs/all_vox_Ridge_mpfc.pkl" print "trying to load" try: clf = OnevsallContinuous.load(clf_file) except: print "Loading failed" clf = OnevsallContinuous(dataset, None, classifier=Ridge()) clf.classify(scoring=r2_score, processes=8) try: clf.save(clf_file) except: pass reduc = RandomizedPCA(n_components=100)
from neurosynth.base.dataset import Dataset from neurosynth.analysis import meta import os dataset = Dataset('database.txt') dataset.add_features('features.txt') print dataset.get_feature_names() ids = dataset.get_ids_by_features('emo*', threshold=0.001) print len(ids) ma = meta.MetaAnalysis(dataset, ids) ma.save_results('emotion')
from neurosynth.base.dataset import Dataset from neurosynth.analysis import decode # Load a saved Dataset file. This example will work with the # file saved in the create_a_new_dataset_and_load_features example. dataset = Dataset.load('dataset.pkl') # Initialize a new Decoder instance with a few features. Note that # if you don't specify a subset of features, ALL features in the # Dataset will be loaded, which will take a long time because # meta-analysis images for each feature need to be generated. decoder = decode.Decoder(dataset, features=['emotion', 'pain', 'somatosensory', 'wm', 'inhibition']) # Decode three images. The sample images here are coactivation # maps for ventral, dorsal, and posterior insula clusters, # respectively. Maps are drawn from data reported in # Chang, Yarkoni, Khaw, & Sanfey (2012); see paper for details. # We save the output--an image x features matrix--to a file. # By default, the decoder will use Pearson correlation, i.e., # each value in our results table indicates the correlation # between the input image and each feature's meta-analysis image. result = decoder.decode(['vIns.nii.gz', 'dIns.nii.gz', 'pIns.nii.gz'], save='decoding_results.txt')
class Neurosynth: def __init__(self, datadir='../data/neurosynth', verbose=True, ma_count_thresh=16, meta_image='consistency_z', resolution=3): self.dataset = None self.concepts = None self.concepts_df = None self.concept_pmids = {} self.datadir = datadir self.datafile = os.path.join(datadir, 'database.txt') self.verbose = verbose self.ma_count_thresh = ma_count_thresh self.meta_image = meta_image self.resolution = resolution self.imagedir_resampled = None self.image_concepts = None self.desmtx = None if not os.path.exists(os.path.join(self.datadir, 'database.txt')): print('downloading neurosynth data') ns.dataset.download(path='/tmp', unpack=True) print('extracting data') tfile = tarfile.open("/tmp/current_data.tar.gz", 'r:gz') if not os.path.exists(self.datadir): os.mkdir(self.datadir) tfile.extractall(self.datadir) os.remove("/tmp/current_data.tar.gz") print('done creating dataset in', self.datadir) self.imagedir = os.path.join(self.datadir, 'ma_images') if not os.path.exists(self.imagedir): os.mkdir(self.imagedir) def get_dataset(self, force_load=False): if os.path.exists(os.path.join(self.datadir, 'dataset.pkl')) and not force_load: print('loading database from', os.path.join(self.datadir, 'dataset.pkl')) self.dataset = Dataset.load( os.path.join(self.datadir, 'dataset.pkl')) else: print('loading database - this takes a few minutes') self.dataset = Dataset(os.path.join(self.datadir, 'database.txt')) self.dataset.add_features( os.path.join(self.datadir, 'features.txt')) self.dataset.save(os.path.join(self.datadir, 'dataset.pkl')) def get_concepts(self, force_load=False): if os.path.exists(os.path.join(self.datadir, 'concepts_df.csv')) and not force_load: print('using cached cognitive atlas concepts') self.concepts_df = pandas.read_csv( os.path.join(self.datadir, 'concepts_df.csv')) else: self.concepts_df = get_concept().pandas self.concepts_df.to_csv( os.path.join(self.datadir, 'concepts_df.csv')) self.concepts = self.concepts_df.name.tolist() def get_concept_pmids(self, retmax=2000000, force_load=False): # get the pmids for each concept that are in neurosynth # for single-word concepts we use the neurosynth search tool # for phrases we use pubmed if os.path.exists(os.path.join( self.datadir, 'concept_pmids.pkl')) and not force_load: print('using cached concept_pmids') self.concept_pmids = pickle.load( open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'rb')) return print('loading all neurosynth pmids') all_neurosynth_ids = self.dataset.image_table.ids.tolist() for id in self.concepts: time.sleep(0.5) handle = Entrez.esearch(db="pubmed", retmax=retmax, term='"%s"' % id) record = Entrez.read(handle) handle.close() # make sure we got all the records - rerun if we didn't if int(record['Count']) > retmax: handle = Entrez.esearch(db="pubmed", retmax=int(record['Count']), term='"%s"' % id) record = Entrez.read(handle) handle.close() records_int = [int(i) for i in record['IdList']] ns_pmids = intersect(all_neurosynth_ids, records_int) print('pubmed found', len(ns_pmids), 'matching pmids for', id) self.concept_pmids[id] = ns_pmids pickle.dump( self.concept_pmids, open(os.path.join(self.datadir, 'concept_pmids.pkl'), 'wb')) def get_concept_images(self, force_load=False): for c in self.concept_pmids.keys(): if not force_load and os.path.exists( os.path.join( self.imagedir, '%s_specificity_z.nii.gz' % c.replace(' ', '-'))): continue if len(self.concept_pmids[c]) < self.ma_count_thresh: #print('skipping',c,len(self.concept_pmids[c]),'pmids') continue print('running meta-analysis for', c) ma = meta.MetaAnalysis(self.dataset, self.concept_pmids[c]) ma.save_results(self.imagedir, c.replace(' ', '-')) if force_load or not os.path.exists( os.path.join(self.imagedir, 'mask_image.nii.gz')): # make mask of voxels with zero standard deviation concept_images = glob.glob( os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image)) imgdata = numpy.zeros((91, 109, 91, len(concept_images))) print('loading concept images to compute std') for i, c in enumerate(concept_images): tmp = nibabel.load(c).get_data() imgdata[:, :, :, i] = tmp imgstd = numpy.std(imgdata, axis=3) maskdata = (imgstd > 0).astype('int') maskimg = nibabel.Nifti1Image(maskdata, affine=nibabel.load(c).affine) maskimg.to_filename( os.path.join(self.imagedir, 'mask_image.nii.gz')) def get_resampled_images(self, shape=None, affine=None, force_load=False): # use 3 mm as default if not shape: shape = [60, 72, 60] affine = numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126], [0, 0, 3, -72], [0, 0, 0, 1]]) self.resolution = affine[1, 1].astype('int') print('resampling data to %d mm' % self.resolution) self.imagedir_resampled = os.path.join( self.datadir, 'ma_images_%dmm' % self.resolution) if not os.path.exists(self.imagedir_resampled): os.mkdir(self.imagedir_resampled) concept_images = glob.glob( os.path.join(self.imagedir, '*_%s.nii.gz' % self.meta_image)) for c in concept_images: if force_load or not os.path.exists( os.path.join(self.imagedir_resampled, os.path.basename(c))): img = nilearn.image.resample_img(c, target_affine=affine, target_shape=shape) img.to_filename( os.path.join(self.imagedir_resampled, os.path.basename(c))) if not os.path.exists( os.path.join(self.datadir, 'mask_%dmm.nii.gz' % self.resolution)): # make MNI mask at chosen resolution mask = os.path.join( os.environ['FSLDIR'], 'data/standard/MNI152_T1_2mm_brain_mask.nii.gz') maskimg = nilearn.image.resample_img(mask, target_affine=affine, target_shape=shape) maskimg.to_filename( os.path.join(self.datadir, 'mask_%dmm.nii.gz' % self.resolution)) def load_concept_images(self, force_load=True): concept_images = glob.glob( os.path.join(self.imagedir_resampled, '*_%s.nii.gz' % self.meta_image)) concept_images.sort() self.image_concepts = [ os.path.basename(i).split('_')[0] for i in concept_images ] if os.path.exists( os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution)): self.imgdata = numpy.load( os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution)) # make sure it's the right size if self.imgdata.shape[1] == len(concept_images): print('using cached concept image data') return masker = nilearn.input_data.NiftiMasker( mask_img=os.path.join(self.datadir, 'mask_%dmm.nii.gz' % self.resolution), target_shape=[60, 72, 60], target_affine=numpy.array([[-3, 0, 0, 90], [0, 3, 0, -126], [0, 0, 3, -72], [0, 0, 0, 1]])) print('loading concept image data') self.imgdata = masker.fit_transform(concept_images) numpy.save( os.path.join(self.datadir, 'imgdata_%dmm.npy' % self.resolution), self.imgdata) def save(self): with open('%s/neurovault_%dmm.pkl' % (self.datadir, self.resolution), 'wb') as f: pickle.dump(self, f) def build_design_matrix(self, force_load=False): if not force_load and os.path.exists( os.path.join(self.datadir, 'desmtx.csv')): self.desmtx = pandas.DataFrame.from_csv( os.path.join(self.datadir, 'desmtx.csv')) print('using cached design matrix') return print('building design matrix') all_concept_pmids = [] for k in self.concept_pmids.keys(): all_concept_pmids = all_concept_pmids + self.concept_pmids[k] all_concept_pmids = list(set(all_concept_pmids)) all_concept_pmids.sort() all_concepts = list(self.concept_pmids.keys()) self.desmtx = pandas.DataFrame(data=0, index=all_concept_pmids, columns=all_concepts) for k in self.concept_pmids.keys(): pmids = self.concept_pmids[k] self.desmtx[k][pmids] = 1 # drop columns with too few matches self.desmtx = self.desmtx.ix[:, self.desmtx.sum() > self.ma_count_thresh] self.desmtx.to_csv(os.path.join(self.datadir, 'desmtx.csv'))
# path of WHS atlas files resource_dir = path.join(path.pardir, 'resources') # make sure we have the data dataset_dir = path.join(path.expanduser('~'), 'Documents', 'neurosynth-data') database_path = path.join(dataset_dir, 'database_bregma.txt') neurosynth_data_url = 'https://github.com/wmpauli/neurosynth-data' if not path.exists(database_path): print("Please download dataset from %s and store it in %s" % (neurosynth_data_url, dataset_dir)) # load dataset, both image table and feature table r = 1.0 # 1mm smoothing kernel transform = {'BREGMA': transformations.bregma_to_whs()} target = 'WHS' masker_filename = path.join(resource_dir, 'WHS_SD_rat_brainmask_sm_v2.nii.gz') dataset = Dataset(path.join(dataset_dir, 'database_bregma.txt'), masker=masker_filename, r=r, transform=transform, target=target) dataset.feature_table = FeatureTable(dataset) dataset.add_features(path.join(dataset_dir, "features_bregma.txt")) # add features fn = dataset.get_feature_names() # get the ids of studies where this feature occurs ids = dataset.get_ids_by_features(('%s*' % feature), threshold=0.1) ma = meta.MetaAnalysis(dataset, ids) results_path = path.join('results', 'meta', feature) if not path.exists(results_path): makedirs(results_path) print("saving results to: %s" % results_path) ma.save_results(results_path) # note, figure 2 of manuscript was used by plotting the z-score statistical maps for forward inference (pAgF_z.nii.gz) and reverse inference (pFgA_z.nii.gz)
resampled_roi = resample_to_img(roi, mni152_2mm, interpolation='nearest', copy=True) resampled_file = join(map_dir, '{0}_mni2mm.nii.gz'.format(file)) resampled_roi.to_filename(resampled_file) roi_files[i] = resampled_file plot_glass_brain(resampled_file, output_file=join( map_dir, '{0}_mni2mm.png'.format(basename(file)))) # In[16]: print('loading dataset...') tds = datetime.now() dataset = Dataset('/Users/Katie/Dropbox/Data/neurosynth-v0.7/database.txt') dataset.add_features('/Users/Katie/Dropbox/Data/neurosynth-v0.7/features.txt') tdf = datetime.now() print('dataset loaded! only took {0}'.format((tdf - tds))) for i in np.arange(0, len(mask_names)): print('{0}\nmeta-analyzing {1}...'.format(datetime.now(), mask_names[i])) tmas = datetime.now() ids = dataset.get_studies(mask=roi_files[i], ) ma = meta.MetaAnalysis(dataset, ids) ma.save_results( output_dir=sink_dir, prefix=mask_names[i], image_list=['association-test_z', 'association-test_z_FDR_0.01']) tmaf = datetime.now()