def step(self): N = self.size visc = self.visc diff = self.diff dt = self.dt Vx = self.Vx Vy = self.Vy Vx0 = self.Vx0 Vy0 = self.Vy0 s = self.s density = self.density diffuse(1, Vx0, Vx, visc, dt) diffuse(2, Vy0, Vy, visc, dt) project(Vx0, Vy0, Vx, Vy) advect(1, Vx, Vx0, Vx0, Vy0, dt) advect(2, Vy, Vy0, Vx0, Vy0, dt) project(Vx, Vy, Vx0, Vy0) diffuse(0, s, density, diff, dt) advect(0, density, s, Vx, Vy, dt)
samples = numpy.array(list(mutations.index)) mutations = numpy.array(mutations) mutations[mutations>1] = 1 toadd = [i for i in genes if i not in thisgenes] tokeep_thisindex = [i for i,j in enumerate(thisgenes) if j in genes] tokeep_geneindex = [i for i,j in enumerate(genes) if j in thisgenes] minflag = (mutations.sum(axis=1) > mutations_min) mutations = mutations[minflag,:] samples = samples[minflag] mutations_temp = numpy.zeros((len(mutations),len(genes))) mutations_temp[:,tokeep_geneindex] = mutations[:,tokeep_thisindex] mutations = sp.csr_matrix(mutations_temp) mutation_smooth = utils.diffuse(mutations,data['adj'],alpha,diff_thresh) mutation_smooth_norm = sp.csr_matrix(utils.quantile_normalization(numpy.array(mutation_smooth.todense())),shape=mutation_smooth.shape) #U,V = utils.gnmf(mutation_smooth,data['knn'],nclust, gamma, maxiter, tolerance) #labels = numpy.array(V.todense().argmax(axis=1))[:,0] def gnmfsingle(X, W, nclust, gamma, maxiter, tolerance): U,V = utils.gnmf(X, W ,nclust, gamma, maxiter, tolerance) return numpy.array(V.todense().argmax(axis=1))[:,0] cons = utils.consensus(gnmfsingle,mutation_smooth_norm, [data['knn'],nclust, gamma, maxiter, tolerance], bootstrap = 0.8,rep = 100) ######take from stratipy modules zmatrix = linkage(cons,method='average') clusters = fcluster(zmatrix,1) dend = dendrogram(zmatrix,count_sort='ascending')
genes = list(set(net[:,0]).union(set(net[:,1]))) genes.sort() ##create adjacency matrix indices = dict((j,i) for i,j in enumerate(genes)) net[:,0] = map(lambda x:indices[x],net[:,0]) net[:,1] = map(lambda x:indices[x],net[:,1]) mat = sp.csr_matrix((net[:,2],(net[:,0],net[:,1])),shape = (len(genes),len(genes))) mat = mat + mat.T ### only if there are no reverse edges present, check this mat = mat.tocsr() ## check if symmetric ##find influence ##borrowed and modified from stratipy raw = sp.dia_matrix((numpy.ones(len(genes)),[0]),shape = (len(genes),len(genes))) influence = utils.diffuse(raw,mat,alpha,thresh) influence = (influence < influence.T).multiply(influence) + (influence.T < influence).multiply(influence.T) influence = influence.multiply(mat) ##imo, this limits the influence to direct connections only i.e first degree and eliminates effect of a node on secondary connections ##This is effectively identifying best k direct connections of a node. ##Need to read this ##Vandin, F., Upfal, E., & Raphael, B. J. (2011). Algorithms for Detecting Significantly Mutated Pathways in Cancer. Journal of Computational Biology, 18(3), 507–522. http://doi.org/10.1089/cmb.2010.0265 ##Vanunu O, Magger O, Ruppin E, Shlomi T, Sharan R (2010) Associating Genes and Protein Complexes with Disease via Network Propagation. PLoS Comput Biol 6(1): e1000641. doi:10.1371/journal.pcbi.1000641 ##a bit slow solution, can be improved vals = sp.find(influence) vals = pandas.DataFrame({'i':vals[0],'j':vals[1],'v':vals[2]}) vals = vals.groupby('i').apply(lambda x:x.ix[x['v'].rank(method='max',ascending=False) <= k]).reset_index(drop=True) vals['v'] = 1 knn = sp.csr_matrix((vals['v'],(vals['i'],vals['j'])),shape=mat.shape) knn = mat.multiply(knn)