def test_parcel_feature_multi_subj(): """ Test parcellation feature interface with multiple subjects """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 nb_subj = 5 v = [] for s in range(nb_subj): data = np.random.randn(np.prod(shape)) domain = grid_domain_from_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) v.append(u) v = np.array(v).T msp = MultiSubjectParcellation(domain, u, v) # test a multi_dimensional feature # dimension 1 msp.make_feature('data', np.random.randn(np.prod(shape), nb_subj)) assert msp.get_feature('data').shape== (nb_parcel, nb_subj) #dimension>1 dim = 4 msp.make_feature('data', np.random.randn(np.prod(shape), nb_subj, dim)) assert msp.get_feature('data').shape== (nb_parcel, nb_subj, dim) # msp.features['data'] has been overriden assert msp.features.keys() == ['data']
def test_parcel_feature_multi_subj(): """ Test parcellation feature interface with multiple subjects """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 nb_subj = 5 v = [] for s in range(nb_subj): data = np.random.randn(np.prod(shape)) domain = grid_domain_from_binary_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) v.append(u) v = np.array(v).T msp = MultiSubjectParcellation(domain, u, v) # test a multi_dimensional feature # dimension 1 msp.make_feature('data', np.random.randn(np.prod(shape), nb_subj)) assert msp.get_feature('data').shape == (nb_parcel, nb_subj) #dimension>1 dim = 4 msp.make_feature('data', np.random.randn(np.prod(shape), nb_subj, dim)) assert msp.get_feature('data').shape == (nb_parcel, nb_subj, dim) # msp.features['data'] has been overriden assert list(msp.features.keys()) == ['data']
def HROI_from_watershed(domain, data, threshold=NINF): """Instantiate an HierarchicalROI as the watershed of a certain dataset Parameters ---------- domain: discrete_domain.StructuredDomain instance Definition of the spatial context. data: array of shape (domain.size) The corresponding data field. threshold: float, optional Thresholding level. Returns ------- nroi : ``HierarchichalROI`` instance The HierachicalROI instance with a ``seed`` feature. """ if threshold > data.max(): # return an empty HROI structure label = -np.ones(data.shape) parents = np.array([]) return HierarchicalROI(domain, label, parents) df = field_from_coo_matrix_and_data(domain.topology, data) idx, label = df.custom_watershed(0, threshold) parents = np.arange(idx.size).astype(int) nroi = HierarchicalROI(domain, label, parents) nroi.set_roi_feature('seed', idx) return nroi
def HROI_from_watershed(domain, data, threshold=NINF): """Instantiate an HierarchicalROI as the watershed of a certain dataset Parameters ---------- domain: discrete_domain.StructuredDomain instance, Definition of the spatial context. data: array of shape (domain.size), The corresponding data field. threshold: float optional, Thresholding level. Returns ------- The HierachicalROI instance with a `seed` feature. """ if threshold > data.max(): # return an empty HROI structure label = - np.ones(data.shape) parents = np.array([]) return HierarchicalROI(domain, label, parents) df = field_from_coo_matrix_and_data(domain.topology, data) idx, label = df.custom_watershed(0, threshold) parents = np.arange(idx.size).astype(int) nroi = HierarchicalROI(domain, label, parents) nroi.set_roi_feature('seed', idx) return nroi
def test_parcel_interface(): """ Simply test parcellation interface """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 data = np.random.randn(np.prod(shape)) domain = grid_domain_from_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) tmp = np.array([np.sum(u == k) for k in range(nb_parcel)]) #instantiate a parcellation msp = MultiSubjectParcellation(domain, u, u) assert msp.nb_parcel == nb_parcel assert msp.nb_subj == 1 assert (msp.population().ravel() == tmp).all()
def test_parcel_interface(): """ Simply test parcellation interface """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 data = np.random.randn(np.prod(shape)) domain = grid_domain_from_binary_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) tmp = np.array([np.sum(u == k) for k in range(nb_parcel)]) #instantiate a parcellation msp = MultiSubjectParcellation(domain, u, u) assert msp.nb_parcel == nb_parcel assert msp.nb_subj == 1 assert (msp.population().ravel() == tmp).all()
def HROI_as_discrete_domain_blobs(domain, data, threshold=NINF, smin=0, criterion='size'): """Instantiate an HierarchicalROI as the blob decomposition of data in a certain domain. Parameters ---------- domain : discrete_domain.StructuredDomain instance, Definition of the spatial context. data : array of shape (domain.size) The corresponding data field. threshold : float, optional Thresholding level. criterion : string, optional To be chosen among 'size' or 'volume'. smin: float, optional A threshold on the criterion. Returns ------- nroi: HierachicalROI instance with a `signal` feature. """ if threshold > data.max(): # return an empty HROI structure label = -np.ones(data.shape) parents = np.array([]) return HierarchicalROI(domain, label, parents) # check size df = field_from_coo_matrix_and_data(domain.topology, data) idx, parents, label = df.threshold_bifurcations(th=threshold) nroi = HierarchicalROI(domain, label, parents) # create a signal feature data = np.ravel(data) signal = [data[nroi.select_id(id, roi=False)] for id in nroi.get_id()] nroi.set_feature('signal', signal) # agglomerate regions in order to compact the structure if necessary nroi = hroi_agglomeration(nroi, criterion=criterion, smin=smin) return nroi
def HROI_as_discrete_domain_blobs(domain, data, threshold=NINF, smin=0, criterion='size'): """Instantiate an HierarchicalROI as the blob decomposition of data in a certain domain. Parameters ---------- domain: discrete_domain.StructuredDomain instance, Definition of the spatial context. data: array of shape (domain.size), The corresponding data field. threshold: float optional, Thresholding level. criterion: string, optional To be chosen among 'size' or 'volume'. smin: float, optional, A threshold on the criterion. Returns ------- nroi: HierachicalROI instance with a `signal` feature. """ if threshold > data.max(): # return an empty HROI structure label = - np.ones(data.shape) parents = np.array([]) return HierarchicalROI(domain, label, parents) # check size df = field_from_coo_matrix_and_data(domain.topology, data) idx, parents, label = df.threshold_bifurcations(th=threshold) nroi = HierarchicalROI(domain, label, parents) # create a signal feature data = np.ravel(data) signal = [data[nroi.select_id(id, roi=False)] for id in nroi.get_id()] nroi.set_feature('signal', signal) # agglomerate regions in order to compact the structure if necessary nroi = hroi_agglomeration(nroi, criterion=criterion, smin=smin) return nroi
def HROI_as_discrete_domain_blobs(domain, data, threshold=NINF, smin=0, rid='', criterion='size'): """Instantiate an HierarchicalROI as the blob decomposition of data in a certain domain. Parameters ---------- domain: discrete_domain.StructuredDomain instance, definition of the spatial context data: array of shape (domain.size), the corresponding data field threshold: float optional, thresholding level smin: float, optional, a threshold on region size or cardinality. rid: string, optional, a region identifier Returns ------- nroi: HierachicalROI instance """ if threshold > data.max(): label = - np.ones(data.shape) parents = np.array([]) return HierarchicalROI(domain, label, parents, rid=rid) # check size df = field_from_coo_matrix_and_data(domain.topology, data) idx, parents, label = df.threshold_bifurcations(th=threshold) nroi = HierarchicalROI(domain, label, parents, rid=rid) # Create a signal feature nroi.make_feature('signal', np.reshape(data, (np.size(data), 1))) # agglomerate regions in order to compact the structure if necessary nroi = hroi_agglomeration(nroi, criterion=criterion, smin=smin) return nroi
def test_parcel_interface_multi_subj(): """ test parcellation interface, with multiple subjects """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 nb_subj = 5 v = [] for s in range(nb_subj): data = np.random.randn(np.prod(shape)) domain = grid_domain_from_binary_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) v.append(u) v = np.array(v).T tmp = np.array([np.sum(v == k, 0) for k in range(nb_parcel)]) #instantiate a parcellation msp = MultiSubjectParcellation(domain, u, v) assert msp.nb_parcel == nb_parcel assert msp.nb_subj == nb_subj assert (msp.population() == tmp).all()
def test_parcel_interface_multi_subj(): """ test parcellation interface, with multiple subjects """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 nb_subj = 5 v = [] for s in range(nb_subj): data = np.random.randn(np.prod(shape)) domain = grid_domain_from_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) v.append(u) v = np.array(v).T tmp = np.array([np.sum(v == k, 0) for k in range(nb_parcel)]) #instantiate a parcellation msp = MultiSubjectParcellation(domain, u, v) assert msp.nb_parcel == nb_parcel assert msp.nb_subj == nb_subj assert (msp.population() == tmp).all()
def test_parcel_feature(): """ Simply test parcellation feature interface """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 data = np.random.randn(np.prod(shape), 1) domain = grid_domain_from_binary_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) #instantiate a parcellation msp = MultiSubjectParcellation(domain, u, u) msp.make_feature('data', data) assert msp.get_feature('data').shape == (nb_parcel, 1) # test with a copy msp2 = msp.copy() assert (msp2.get_feature('data') == msp2.get_feature('data')).all() # test a multi_dimensional feature dim = 4 msp.make_feature('new', np.random.randn(np.prod(shape), 1, dim)) assert msp.get_feature('new').shape == (nb_parcel, 1, dim)
def test_parcel_feature(): """ Simply test parcellation feature interface """ # prepare some data shape = (5, 5, 5) nb_parcel = 10 data = np.random.randn(np.prod(shape), 1) domain = grid_domain_from_array(np.ones(shape)) g = field_from_coo_matrix_and_data(domain.topology, data) u, J0 = g.ward(nb_parcel) #instantiate a parcellation msp = MultiSubjectParcellation(domain, u, u) msp.make_feature('data', data) assert msp.get_feature('data').shape== (nb_parcel, 1) # test with a copy msp2 = msp.copy() assert (msp2.get_feature('data') == msp2.get_feature('data')).all() # test a multi_dimensional feature dim = 4 msp.make_feature('new', np.random.randn(np.prod(shape), 1, dim)) assert msp.get_feature('new').shape== (nb_parcel, 1, dim)
def HROI_from_watershed(domain, data, threshold=NINF, rid=''): """Instantiate an HierarchicalROI as the watershed of a certain dataset Parameters ---------- domain: discrete_domain.StructuredDomain instance, definition of the spatial context data: array of shape (domain.size), the corresponding data field threshold: float optional, thresholding level Returns ------- The HierachicalROI instance Fixme ----- should be a sub-domain (?) Additionally a discrete_field is created, with the key `index`. It contains the index in the field from which each point of each ROI. """ if threshold > data.max(): label = - np.ones(data.shape) parents = np.array([]) return HierarchicalROI(domain, label, parents, rid=rid) df = field_from_coo_matrix_and_data(domain.topology, data) idx, label = df.custom_watershed(0, threshold) parents = np.arange(idx.size) nroi = HierarchicalROI(domain, label, parents, rid=rid) # this is a custom thing, sorry nroi.set_roi_feature('seed', idx) return nroi
def make_parcellation_surf_from_files(beta_files, mesh_file, parcellation_file, nbparcel, method, mu=10., verbose=0): if method not in ['ward', 'gkm', 'ward_and_gkm', 'kmeans']: raise ValueError('unknown method') # step 1: load the data ---------------------------- # 1.1 the domain logger.info('domain from mesh: %s', mesh_file) domain = domain_from_mesh(mesh_file) coord = domain.coord # 1.3 read the functional data beta = np.array([read_texture(b)[0] for b in beta_files]).T logger.info('beta: %s', str(beta.shape)) logger.info('mu * coord / np.std(coord): %s', (mu * coord / np.std(coord)).shape) feature = np.hstack((beta, mu * coord / np.std(coord))) if method is not 'kmeans': g = field_from_coo_matrix_and_data(domain.topology, feature) if method == 'kmeans': _, u, _ = kmeans(feature, nbparcel) if method == 'ward': u, _ = g.ward(nbparcel) if method == 'gkm': seeds = np.argsort(np.random.rand(g.V))[:nbparcel] _, u, _ = g.geodesic_kmeans(seeds) if method == 'ward_and_gkm': w, _ = g.ward(nbparcel) _, u, _ = g.geodesic_kmeans(label=w) lpa = SubDomains(domain, u, 'parcellation') if verbose: var_beta = np.array( [np.var(beta[lpa.label == k], 0).sum() for k in range(lpa.k)]) var_coord = np.array( [np.var(coord[lpa.label == k], 0).sum() for k in range(lpa.k)]) size = lpa.get_size() vf = np.dot(var_beta, size) / size.sum() va = np.dot(var_coord, size) / size.sum() print nbparcel, "functional variance", vf, "anatomical variance", va # step3: write the resulting label image if parcellation_file is not None: label_image = parcellation_file else: label_image = None if label_image is not None: write_texture(u.astype(np.int32), label_image) if verbose: print "Wrote the parcellation images as %s" % label_image return u, label_image
def make_parcellation_surf_from_files(beta_files, mesh_file, parcellation_file, nbparcel, method, mu=10., verbose=0): if method not in ['ward', 'gkm', 'ward_and_gkm', 'kmeans']: raise ValueError('unknown method') # step 1: load the data ---------------------------- # 1.1 the domain pyhrf.verbose(3, 'domain from mesh: %s' %mesh_file) domain = domain_from_mesh(mesh_file) coord = domain.coord # 1.3 read the functional data beta = np.array([read_texture(b)[0] for b in beta_files]).T pyhrf.verbose(3, 'beta: %s' %str(beta.shape)) pyhrf.verbose(3, 'mu * coord / np.std(coord): %s' \ %(mu * coord / np.std(coord)).shape) feature = np.hstack((beta, mu * coord / np.std(coord))) if method is not 'kmeans': # print 'domain.topology:', domain.topology.__class__ # print domain.topology #print dir(domain.topology) # print 'feature:', feature.shape # print feature g = field_from_coo_matrix_and_data(domain.topology, feature) # print 'g:', g.__class__ # print g if method == 'kmeans': _, u, _ = kmeans(feature, nbparcel) if method == 'ward': u, _ = g.ward(nbparcel) if method == 'gkm': seeds = np.argsort(np.random.rand(g.V))[:nbparcel] _, u, _ = g.geodesic_kmeans(seeds) if method == 'ward_and_gkm': w, _ = g.ward(nbparcel) _, u, _ = g.geodesic_kmeans(label=w) # print 'u:' # print u lpa = SubDomains(domain, u, 'parcellation') if verbose: var_beta = np.array( [np.var(beta[lpa.label == k], 0).sum() for k in range(lpa.k)]) var_coord = np.array( [np.var(coord[lpa.label == k], 0).sum() for k in range(lpa.k)]) size = lpa.get_size() vf = np.dot(var_beta, size) / size.sum() va = np.dot(var_coord, size) / size.sum() print nbparcel, "functional variance", vf, "anatomical variance", va # step3: write the resulting label image if parcellation_file is not None: label_image = parcellation_file # elif write_dir is not None: # label_image = os.path.join(write_dir, "parcel_%s.nii" % method) else: label_image = None if label_image is not None: #lpa.to_image(label_image, descrip='Intra-subject parcellation image') write_texture(u.astype(np.int32), label_image) if verbose: print "Wrote the parcellation images as %s" % label_image return u, label_image
def bsa_dpmm(bf, gf0, sub, gfc, dmax, thq, ths, verbose=0): """ Estimation of the population level model of activation density using dpmm and inference Parameters ---------- bf list of nipy.labs.spatial_models.hroi.HierarchicalROI instances representing individual ROIs let nr be the number of terminal regions across subjects gf0, array of shape (nr) the mixture-based prior probability that the terminal regions are true positives sub, array of shape (nr) the subject index associated with the terminal regions gfc, array of shape (nr, coord.shape[1]) the coordinates of the of the terminal regions dmax float>0: expected cluster std in the common space in units of coord thq = 0.5 (float in the [0,1] interval) p-value of the prevalence test ths=0, float in the rannge [0,nsubj] null hypothesis on region prevalence that is rejected during inference verbose=0, verbosity mode Returns ------- crmap: array of shape (nnodes): the resulting group-level labelling of the space LR: a instance of sbf.LandmarkRegions that describes the ROIs found in inter-subject inference If no such thing can be defined LR is set to None bf: List of nipy.labs.spatial_models.hroi.Nroi instances representing individual ROIs p: array of shape (nnodes): likelihood of the data under H1 over some sampling grid """ from nipy.algorithms.graph.field import field_from_coo_matrix_and_data dom = bf[0].domain n_subj = len(bf) crmap = - np.ones(dom.size, np.int) LR = None p = np.zeros(dom.size) if len(sub) < 1: return crmap, LR, bf, p sub = np.concatenate(sub).astype(np.int) gfc = np.concatenate(gfc) gf0 = np.concatenate(gf0) g0 = 1. / dom.local_volume.sum() # prepare the DPMM dim = dom.em_dim g1 = g0 prior_precision = 1. / (dmax * dmax) * np.ones((1, dim)) dof = 10 burnin = 100 nis = 1000 # nis = number of iterations to estimate p p, q = dpmm(gfc, 0.5, g0, g1, dof, prior_precision, 1 - gf0, sub, burnin, dom.coord, nis) if verbose: h1, c1 = np.histogram((1 - gf0), bins=100) h2, c2 = np.histogram(q, bins=100) try: import matplotlib.pylab as pl pl.figure() pl.plot(1 - gf0, q, '.') pl.figure() pl.bar(c1[:len(h1)], h1, width=0.005) pl.bar(c2[:len(h2)] + 0.003, h2, width=0.005, color='r') except ImportError: pass print 'Number of candidate regions %i, regions found %i' % ( np.size(q), q.sum()) Fbeta = field_from_coo_matrix_and_data(dom.topology, p) _, label = Fbeta.custom_watershed(0, g0) # append some information to the hroi in each subject for s in range(n_subj): bfs = bf[s] if bfs.k > 0: leaves_pos = [bfs.select_id(k) for k in bfs.get_leaves_id()] us = - np.ones(bfs.k).astype(np.int) # set posterior proba lq = np.zeros(bfs.k) lq[leaves_pos] = q[sub == s] bfs.set_roi_feature('posterior_proba', lq) # set prior proba lq = np.zeros(bfs.k) lq[leaves_pos] = 1 - gf0[sub == s] bfs.set_roi_feature('prior_proba', lq) pos = np.asarray( [np.mean(coords, 0) for coords in bfs.get_coord()]) midx = [np.argmin(np.sum((dom.coord - pos[k]) ** 2, 1)) for k in range(bfs.k)] j = label[np.array(midx)] us[leaves_pos] = j[leaves_pos] # when parent regions has similarly labelled children, # include it also us = bfs.make_forest().propagate_upward(us) bfs.set_roi_feature('label', us) # derive the group-level landmarks # with a threshold on the number of subjects # that are represented in each one LR, nl = build_LR(bf, thq, ths, dmax, verbose=verbose) # make a group-level map of the landmark position crmap = _relabel_(label, nl) return crmap, LR, bf, p
def fixed_parcellation(mask_image, betas, nbparcel, nn=6, method='ward', write_dir=None, mu=10., verbose=0, fullpath=None): """ Fixed parcellation of a given dataset Parameters ---------- domain/mask_image betas: list of paths to activation images from the subject nbparcel, int : number fo desired parcels nn=6: number of nearest neighbors to define the image topology (6, 18 or 26) method='ward': clustering method used, to be chosen among 'ward', 'gkm', 'ward_and-gkm' 'ward': Ward's clustering algorithm 'gkm': Geodesic k-means algorithm, random initialization 'gkm_and_ward': idem, initialized by Ward's clustering write_di: string, topional, write directory. If fullpath is None too, then no file output. mu = 10., float: the relative weight of anatomical information verbose=0: verbosity mode fullpath=None, string, path of the output image If write_dir and fullpath are None then no file output. If only fullpath is None then it is the write dir + a name depending on the method. Notes ----- Ward's method takes time (about 6 minutes for a 60K voxels dataset) Geodesic k-means is 'quick and dirty' Ward's + GKM is expensive but quite good To reduce CPU time, rather use nn=6 (especially with Ward) """ from nipy.algorithms.graph.field import field_from_coo_matrix_and_data if method not in ['ward', 'gkm', 'ward_and_gkm', 'kmeans']: raise ValueError('unknown method') if nn not in [6, 18, 26]: raise ValueError('nn should be 6,18 or 26') # step 1: load the data ---------------------------- # 1.1 the domain domain = grid_domain_from_image(mask_image, nn) if method is not 'kmeans': # 1.2 get the main cc of the graph # to remove the small connected components pass coord = domain.coord # 1.3 read the functional data beta = np.array([domain.make_feature_from_image(b) for b in betas]) if len(beta.shape) > 2: beta = np.squeeze(beta) if beta.shape[0] != domain.size: beta = beta.T feature = np.hstack((beta, mu * coord / np.std(coord))) #step 2: parcellate the data --------------------------- if method is not 'kmeans': g = field_from_coo_matrix_and_data(domain.topology, feature) if method == 'kmeans': _, u, _ = kmeans(feature, nbparcel) if method == 'ward': u, _ = g.ward(nbparcel) if method == 'gkm': seeds = np.argsort(np.random.rand(g.V))[:nbparcel] _, u, _ = g.geodesic_kmeans(seeds) if method == 'ward_and_gkm': w, _ = g.ward(nbparcel) _, u, _ = g.geodesic_kmeans(label=w) lpa = SubDomains(domain, u) if verbose: var_beta = np.array( [np.var(beta[lpa.label == k], 0).sum() for k in range(lpa.k)]) var_coord = np.array( [np.var(coord[lpa.label == k], 0).sum() for k in range(lpa.k)]) size = lpa.get_size() vf = np.dot(var_beta, size) / size.sum() va = np.dot(var_coord, size) / size.sum() print nbparcel, "functional variance", vf, "anatomical variance", va # step3: write the resulting label image if fullpath is not None: label_image = fullpath elif write_dir is not None: label_image = os.path.join(write_dir, "parcel_%s.nii" % method) else: label_image = None if label_image is not None: lpa_img = lpa.to_image( fid='id', roi=True, descrip='Intra-subject parcellation image') save(lpa_img, label_image) if verbose: print "Wrote the parcellation images as %s" % label_image return lpa
def _bsa_dpmm(hrois, prior_h0, subjects, coords, sigma, prevalence_pval, prevalence_threshold, dof=10, alpha=.5, n_iter=1000, burnin=100, algorithm='density'): """ Estimation of the population level model of activation density using dpmm and inference Parameters ---------- hrois: list of nipy.labs.spatial_models.hroi.HierarchicalROI instances representing individual ROIs Let nr be the number of terminal regions across subjects prior_h0: array of shape (nr) mixture-based prior probability that the terminal regions are true positives subjects: array of shape (nr) subject index associated with the terminal regions coords: array of shape (nr, coord.shape[1]) coordinates of the of the terminal regions sigma: float > 0, expected cluster scatter in the common space in units of coord prevalence_pval: float in the [0,1] interval, optional p-value of the prevalence test prevalence_threshold: float in the rannge [0,nsubj] null hypothesis on region prevalence dof: float > 0, optional, degrees of freedom of the prior alpha: float > 0, optional, creation parameter of the DPMM niter: int, optional, number of iterations of the DPMM burnin: int, optional, number of iterations of the DPMM algorithm: {'density', 'co_occurrence'}, optional, algorithm used in the DPMM inference Returns ------- landmarks: instance of sbf.LandmarkRegions that describes the ROIs found in inter-subject inference If no such thing can be defined landmarks is set to None hrois: List of nipy.labs.spatial_models.hroi.HierarchicalROI instances representing individual ROIs """ from nipy.algorithms.graph.field import field_from_coo_matrix_and_data domain = hrois[0].domain n_subjects = len(hrois) landmarks = None density = np.zeros(domain.size) if len(subjects) < 1: return landmarks, hrois null_density = 1. / domain.local_volume.sum() # prepare the DPMM dim = domain.em_dim prior_precision = 1. / (sigma**2) * np.ones((1, dim)) # n_iter = number of iterations to estimate density if algorithm == 'density': density, post_proba = _dpmm(coords, alpha, null_density, dof, prior_precision, prior_h0, subjects, domain.coord, n_iter=n_iter, burnin=burnin) # associate labels with coords Fbeta = field_from_coo_matrix_and_data(domain.topology, density) _, label = Fbeta.custom_watershed(0, null_density) midx = np.array([ np.argmin(np.sum((domain.coord - coord_)**2, 1)) for coord_ in coords ]) components = label[midx] elif algorithm == 'co-occurrence': post_proba, density, co_clustering = _dpmm(coords, alpha, null_density, dof, prior_precision, prior_h0, subjects, n_iter=n_iter, burnin=burnin, co_clust=True) contingency_graph = wgraph_from_coo_matrix(co_clustering) if contingency_graph.E > 0: contingency_graph.remove_edges(contingency_graph.weights > .5) components = contingency_graph.cc() components[density < null_density] = components.max() + 1 +\ np.arange(np.sum(density < null_density)) else: raise ValueError('Unknown algorithm') # append some information to the hroi in each subject for subject in range(n_subjects): bfs = hrois[subject] if bfs is None: continue if bfs.k == 0: bfs.set_roi_feature('label', np.array([])) continue leaves_pos = [bfs.select_id(k) for k in bfs.get_leaves_id()] # save posterior proba post_proba_ = np.zeros(bfs.k) post_proba_[leaves_pos] = post_proba[subjects == subject] bfs.set_roi_feature('posterior_proba', post_proba_) # save prior proba prior_proba = np.zeros(bfs.k) prior_proba[leaves_pos] = 1 - prior_h0[subjects == subject] bfs.set_roi_feature('prior_proba', prior_proba) # assign labels to ROIs roi_label = -np.ones(bfs.k).astype(np.int) roi_label[leaves_pos] = components[subjects == subject] # when parent regions has similarly labelled children, # include it also roi_label = bfs.make_forest().propagate_upward(roi_label) bfs.set_roi_feature('label', roi_label) # derive the group-level landmarks # with a threshold on the number of subjects # that are represented in each one landmarks, new_labels = build_landmarks(domain, coords, subjects, np.array(components), 1 - prior_h0, prevalence_pval, prevalence_threshold, sigma) # relabel the regions _update_hroi_labels(hrois, new_labels) return landmarks, hrois
def _bsa_dpmm(hrois, prior_h0, subjects, coords, sigma, prevalence_pval, prevalence_threshold, dof=10, alpha=.5, n_iter=1000, burnin=100, algorithm='density'): """ Estimation of the population level model of activation density using dpmm and inference Parameters ---------- hrois: list of nipy.labs.spatial_models.hroi.HierarchicalROI instances representing individual ROIs Let nr be the number of terminal regions across subjects prior_h0: array of shape (nr) mixture-based prior probability that the terminal regions are true positives subjects: array of shape (nr) subject index associated with the terminal regions coords: array of shape (nr, coord.shape[1]) coordinates of the of the terminal regions sigma: float > 0, expected cluster scatter in the common space in units of coord prevalence_pval: float in the [0,1] interval, optional p-value of the prevalence test prevalence_threshold: float in the rannge [0,nsubj] null hypothesis on region prevalence dof: float > 0, optional, degrees of freedom of the prior alpha: float > 0, optional, creation parameter of the DPMM niter: int, optional, number of iterations of the DPMM burnin: int, optional, number of iterations of the DPMM algorithm: {'density', 'co_occurrence'}, optional, algorithm used in the DPMM inference Returns ------- landmarks: instance of sbf.LandmarkRegions that describes the ROIs found in inter-subject inference If no such thing can be defined landmarks is set to None hrois: List of nipy.labs.spatial_models.hroi.HierarchicalROI instances representing individual ROIs """ from nipy.algorithms.graph.field import field_from_coo_matrix_and_data domain = hrois[0].domain n_subjects = len(hrois) landmarks = None density = np.zeros(domain.size) if len(subjects) < 1: return landmarks, hrois null_density = 1. / domain.local_volume.sum() # prepare the DPMM dim = domain.em_dim prior_precision = 1. / (sigma ** 2) * np.ones((1, dim)) # n_iter = number of iterations to estimate density if algorithm == 'density': density, post_proba = _dpmm( coords, alpha, null_density, dof, prior_precision, prior_h0, subjects, domain.coord, n_iter=n_iter, burnin=burnin) # associate labels with coords Fbeta = field_from_coo_matrix_and_data(domain.topology, density) _, label = Fbeta.custom_watershed(0, null_density) midx = np.array([np.argmin(np.sum((domain.coord - coord_) ** 2, 1)) for coord_ in coords]) components = label[midx] elif algorithm == 'co-occurrence': post_proba, density, co_clustering = _dpmm( coords, alpha, null_density, dof, prior_precision, prior_h0, subjects, n_iter=n_iter, burnin=burnin, co_clust=True) contingency_graph = wgraph_from_coo_matrix(co_clustering) if contingency_graph.E > 0: contingency_graph.remove_edges(contingency_graph.weights > .5) components = contingency_graph.cc() components[density < null_density] = components.max() + 1 +\ np.arange(np.sum(density < null_density)) else: raise ValueError('Unknown algorithm') # append some information to the hroi in each subject for subject in range(n_subjects): bfs = hrois[subject] if bfs is None: continue if bfs.k == 0: bfs.set_roi_feature('label', np.array([])) continue leaves_pos = [bfs.select_id(k) for k in bfs.get_leaves_id()] # save posterior proba post_proba_ = np.zeros(bfs.k) post_proba_[leaves_pos] = post_proba[subjects == subject] bfs.set_roi_feature('posterior_proba', post_proba_) # save prior proba prior_proba = np.zeros(bfs.k) prior_proba[leaves_pos] = 1 - prior_h0[subjects == subject] bfs.set_roi_feature('prior_proba', prior_proba) # assign labels to ROIs roi_label = - np.ones(bfs.k).astype(np.int) roi_label[leaves_pos] = components[subjects == subject] # when parent regions has similarly labelled children, # include it also roi_label = bfs.make_forest().propagate_upward(roi_label) bfs.set_roi_feature('label', roi_label) # derive the group-level landmarks # with a threshold on the number of subjects # that are represented in each one landmarks, new_labels = build_landmarks( domain, coords, subjects, np.array(components), 1 - prior_h0, prevalence_pval, prevalence_threshold, sigma) # relabel the regions _update_hroi_labels(hrois, new_labels) return landmarks, hrois
def bsa_dpmm(bf, gf0, sub, gfc, dmax, thq, ths, verbose=0): """ Estimation of the population level model of activation density using dpmm and inference Parameters ---------- bf list of nipy.labs.spatial_models.hroi.HierarchicalROI instances representing individual ROIs let nr be the number of terminal regions across subjects gf0, array of shape (nr) the mixture-based prior probability that the terminal regions are true positives sub, array of shape (nr) the subject index associated with the terminal regions gfc, array of shape (nr, coord.shape[1]) the coordinates of the of the terminal regions dmax float>0: expected cluster std in the common space in units of coord thq = 0.5 (float in the [0,1] interval) p-value of the prevalence test ths=0, float in the rannge [0,nsubj] null hypothesis on region prevalence that is rejected during inference verbose=0, verbosity mode Returns ------- crmap: array of shape (nnodes): the resulting group-level labelling of the space LR: a instance of sbf.LandmarkRegions that describes the ROIs found in inter-subject inference If no such thing can be defined LR is set to None bf: List of nipy.labs.spatial_models.hroi.Nroi instances representing individual ROIs p: array of shape (nnodes): likelihood of the data under H1 over some sampling grid """ from nipy.algorithms.graph.field import field_from_coo_matrix_and_data dom = bf[0].domain n_subj = len(bf) crmap = -np.ones(dom.size, np.int) LR = None p = np.zeros(dom.size) if len(sub) < 1: return crmap, LR, bf, p sub = np.concatenate(sub).astype(np.int) gfc = np.concatenate(gfc) gf0 = np.concatenate(gf0) g0 = 1. / dom.local_volume.sum() # prepare the DPMM dim = dom.em_dim g1 = g0 prior_precision = 1. / (dmax * dmax) * np.ones((1, dim)) dof = 10 burnin = 100 nis = 1000 # nis = number of iterations to estimate p p, q = dpmm(gfc, 0.5, g0, g1, dof, prior_precision, 1 - gf0, sub, burnin, dom.coord, nis) if verbose: h1, c1 = np.histogram((1 - gf0), bins=100) h2, c2 = np.histogram(q, bins=100) try: import matplotlib.pylab as pl pl.figure() pl.plot(1 - gf0, q, '.') pl.figure() pl.bar(c1[:len(h1)], h1, width=0.005) pl.bar(c2[:len(h2)] + 0.003, h2, width=0.005, color='r') except ImportError: pass print 'Number of candidate regions %i, regions found %i' % (np.size(q), q.sum()) Fbeta = field_from_coo_matrix_and_data(dom.topology, p) _, label = Fbeta.custom_watershed(0, g0) # append some information to the hroi in each subject for s in range(n_subj): bfs = bf[s] if bfs.k > 0: leaves_pos = [bfs.select_id(k) for k in bfs.get_leaves_id()] us = -np.ones(bfs.k).astype(np.int) # set posterior proba lq = np.zeros(bfs.k) lq[leaves_pos] = q[sub == s] bfs.set_roi_feature('posterior_proba', lq) # set prior proba lq = np.zeros(bfs.k) lq[leaves_pos] = 1 - gf0[sub == s] bfs.set_roi_feature('prior_proba', lq) pos = np.asarray( [np.mean(coords, 0) for coords in bfs.get_coord()]) midx = [ np.argmin(np.sum((dom.coord - pos[k])**2, 1)) for k in range(bfs.k) ] j = label[np.array(midx)] us[leaves_pos] = j[leaves_pos] # when parent regions has similarly labelled children, # include it also us = bfs.make_forest().propagate_upward(us) bfs.set_roi_feature('label', us) # derive the group-level landmarks # with a threshold on the number of subjects # that are represented in each one LR, nl = build_LR(bf, thq, ths, dmax, verbose=verbose) # make a group-level map of the landmark position crmap = _relabel_(label, nl) return crmap, LR, bf, p