def calcH2Continuous_twotails(XXT, phe, keepArr, prev, h2coeff): print 'computing h2 for a two-tails ascertained study...' XXT = XXT[np.ix_(keepArr, keepArr)] phe = phe[keepArr] t1 = stats.norm(0,1).ppf(prev) t2 = stats.norm(0,1).isf(prev) phit1 = stats.norm(0,1).pdf(t1) phit2 = stats.norm(0,1).pdf(t2) K1 = prev K2 = prev xCoeff = ((phit2*t2 - phit1*t1 + K1 + K2)**2 * (K1+K2)**2 - (phit2-phit1)**4) / (K1 + K2)**4 intersect = ((phit2-phit1) / (K1+K2))**2 pheMean = 0 pheVar = 1 x = (xCoeff * h2coeff) * XXT y = np.outer((phe-pheMean)/np.sqrt(pheVar), (phe-pheMean)/np.sqrt(pheVar)) y -= intersect y = y[np.triu_indices(y.shape[0], 1)] x = x[np.triu_indices(x.shape[0], 1)] slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y) return slope
def test_syrk(k, n, dtype, rng): tols = tolerances[dtype] A = np.zeros((n, k), dtype=dtype) C = np.zeros((n, n), dtype=dtype) D = np.zeros((k, k), dtype=dtype) A[...] = rng.uniform(-1, 1, size=A.shape) C[...] = rng.uniform(-1, 1, size=C.shape) D[...] = rng.uniform(-1, 1, size=D.shape) clA, clC, clD = map(to_ocl, [A, C, D]) a = 0.9 b = 0.5 try: blas.setup() # normal syrk up = np.triu_indices(n) event = blas.syrk(queue, clA, clC, alpha=a, beta=b) assert np.allclose(clC.get()[up], (a*np.dot(A, A.T) + b*C)[up], **tols) assert isinstance(event, cl.Event) # transposed syrk up = np.triu_indices(k) blas.syrk(queue, clA, clD, transA=True, alpha=a, beta=b) assert np.allclose(clD.get()[up], (a*np.dot(A.T, A) + b*D)[up], **tols) finally: blas.teardown()
def calcH2Binary(XXT, phe, probs, thresholds, keepArr, prev, h2coeff): K = prev P = np.sum(phe>0) / float(phe.shape[0]) XXT = XXT[np.ix_(keepArr, keepArr)] phe = phe[keepArr] if (thresholds is None): t = stats.norm(0,1).isf(K) phit = stats.norm(0,1).pdf(t) xCoeff = P*(1-P) / (K**2 * (1-K)**2) * phit**2 * h2coeff y = np.outer((phe-P) / np.sqrt(P*(1-P)), (phe-P) / np.sqrt(P*(1-P))) x = xCoeff * XXT else: probs = probs[keepArr] thresholds = thresholds[keepArr] Ki = K*(1-P) / (P*(1-K)) * probs / (1 + K*(1-P) / (P*(1-K))*probs - probs) phit = stats.norm(0,1).pdf(thresholds) probsInvOuter = np.outer(probs*(1-probs), probs*(1-probs)) y = np.outer(phe-probs, phe-probs) / np.sqrt(probsInvOuter) sumProbs = np.tile(np.column_stack(probs).T, (1,probs.shape[0])) + np.tile(probs, (probs.shape[0], 1)) Atag0 = np.outer(phit, phit) * (1 - (sumProbs)*(P-K)/(P*(1-K)) + np.outer(probs, probs)*(((P-K)/(P*(1-K)))**2)) / np.sqrt(probsInvOuter) B0 = np.outer(Ki + (1-Ki)*(K*(1-P))/(P*(1-K)), Ki + (1-Ki)*(K*(1-P))/(P*(1-K))) x = (Atag0 / B0 * h2coeff) * XXT y = y[np.triu_indices(y.shape[0], 1)] x = x[np.triu_indices(x.shape[0], 1)] slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y) return slope
def test_triu_indices(self): iu1 = triu_indices(4) iu2 = triu_indices(4, 2) a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) # Both for indexing: yield (assert_array_equal, a[iu1], array([1, 2, 3, 4, 6, 7, 8, 11, 12, 16])) # And for assigning values: a[iu1] = -1 yield (assert_array_equal, a, array([[-1, -1, -1, -1], [ 5, -1, -1, -1], [ 9, 10, -1, -1], [13, 14, 15, -1]]) ) # These cover almost the whole array (two diagonals right of the main one): a[iu2] = -10 yield ( assert_array_equal, a, array([[ -1, -1, -10, -10], [ 5, -1, -1, -10], [ 9, 10, -1, -1], [ 13, 14, 15, -1]]) )
def __init__(self, eta=None, n=None, p=None, transform='interval', *args, **kwargs): if (p is not None) and (n is not None) and (eta is None): warnings.warn('Parameters to LKJCorr have changed: shape parameter n -> eta ' 'dimension parameter p -> n. Please update your code. ' 'Automatically re-assigning parameters for backwards compatibility.', DeprecationWarning) self.n = p self.eta = n eta = self.eta n = self.n elif (n is not None) and (eta is not None) and (p is None): self.n = n self.eta = eta else: raise ValueError('Invalid parameter: please use eta as the shape parameter and ' 'n as the dimension parameter.') n_elem = int(n * (n - 1) / 2) self.mean = np.zeros(n_elem, dtype=theano.config.floatX) if transform == 'interval': transform = transforms.interval(-1, 1) super(LKJCorr, self).__init__(shape=n_elem, transform=transform, *args, **kwargs) warnings.warn('Parameters in LKJCorr have been rename: shape parameter n -> eta ' 'dimension parameter p -> n. Please double check your initialization.', DeprecationWarning) self.tri_index = np.zeros([n, n], dtype='int32') self.tri_index[np.triu_indices(n, k=1)] = np.arange(n_elem) self.tri_index[np.triu_indices(n, k=1)[::-1]] = np.arange(n_elem)
def mvn_msr(self, corrDS=None, abstol=1e-12, reltol=1e-12, intLb=-10, intUb=10): systype = self.systype beta = self.beta nls = len(self.comps) if corrDS is None: correl = self.syscorrDS[np.triu_indices(nls, 1)] else: correl = corrDS[np.triu_indices(nls, 1)] if corrDS is None: corrDS = self.syscorrDS i = 1 n = 10000 syspf0 = 0.0 dpf = 1.0 # while i!=0: # n +=10000 # v,res,i = stats.mvn.mvndst(intLb*np.ones(nls), beta, np.zeros(nls, dtype=int), correl, [nls*n,1e-12, 1e-12]) while i != 0: n += 10000 res, i = stats.mvn.mvnun(-10 * np.ones(nls), beta, np.zeros(nls), corrDS, [nls * n, abstol, reltol]) # if abs(res-res1)/(0.5*(res+res1))>1e-3: # print 'warning: abnormal difference between mvnun and mvndst results' if systype.lower() == "series": syspf = 1.0 - res sysbeta = -stats.norm.ppf(syspf) results = ReliabilityResults(sysbeta, syspf) elif systype.lower() == "parallel": syspf = res sysbeta = -stats.norm.ppf(syspf) results = ReliabilityResults(sysbeta, syspf) else: print ("mvn_msr only supports series or parallel system") sys.exit(0) return results
def scoring2B_behavior(): t_clusters = np.zeros((600,3)) t_clusters[0:200,0] = 1 t_clusters[200:400,1] = 1 t_clusters[400:,2] = 1 t_ccm = np.dot(t_clusters,t_clusters.T) n_uniq = len(np.triu_indices(t_ccm.shape[0],k=1)[0]) res = [] concentrations = [1000,100,50,25,10,5,3,1] for c in concentrations: for i in range(50): ccm = np.copy(t_ccm) ccm[np.triu_indices(t_ccm.shape[0],k=1)] -= np.random.beta(1,c,n_uniq) #ccm[np.tril_indices(t_ccm.shape[0],k=-1)] = ccm[np.triu_indices(t_ccm.shape[0],k=1)] ccm[np.tril_indices(t_ccm.shape[0],k=-1)] = 0 ccm = ccm + ccm.T np.fill_diagonal(ccm,1) ccm = np.abs(ccm) res.append([c,calculate2(ccm,t_ccm)]) res = [map(str,x) for x in res] res = ['\t'.join(x) for x in res] f = open('scoring2B_beta.tsv', 'w') f.write('\n'.join(res)) f.close()
def insertCartesainData(gt_row, xyzuvw_mean, xyzuvw_cov): dim = 6 # CART_COL_NAMES = ['X', 'Y', 'Z', 'U', 'V', 'W', # 'dX', 'dY', 'dZ', 'dU', 'dV', 'dW', # 'c_XY', 'c_XZ', 'c_XU', 'c_XV', 'c_XW', # 'c_YZ', 'c_YU', 'c_YV', 'c_YW', # 'c_ZU', 'c_ZV', 'c_ZW', # 'c_UV', 'c_UW', # 'c_VW'] # fill in cartesian mean try: for col_ix, col_name in enumerate(CART_COL_NAMES[:6]): gt_row[col_name] = xyzuvw_mean[col_ix] except IndexError: import pdb; pdb.set_trace() # fill in standard deviations xyzuvw_stds = np.sqrt(xyzuvw_cov[np.diag_indices(dim)]) for col_ix, col_name in enumerate(CART_COL_NAMES[6:12]): gt_row[col_name] = xyzuvw_stds[col_ix] correl_matrix = xyzuvw_cov / xyzuvw_stds / xyzuvw_stds.reshape(6, 1) # fill in correlations for col_ix, col_name in enumerate(CART_COL_NAMES[12:]): gt_row[col_name] = correl_matrix[ np.triu_indices(dim, k=1)[0][col_ix], np.triu_indices(dim, k=1)[1][col_ix] ]
def loss_function(mapping12): """Computes the loss function of a given mapping. Using the graph kernel of two sets of distance. """ global tractography1, tractography2 global dm1_all, dm1_all_small, dm2_all, dm2_all_small global kdt1, kdt2 k = 10 radius = 150 loss = 0.0 for sid in np.arange(len(tractography1)): #idx1 = kdt1.query_radius(dm1_all_small[sid], radius)[0] idx1 = kdt1.query(dm1_all_small[sid], k)[1][0] dm_small1 = dm1_all[idx1][:,idx1] e1 = dm_small1[np.triu_indices(dm_small1.shape[0],1)] #idx2 = kdt2.query_radius(dm2_all_small[mapping12[sid]], radius)[0] idx2 = kdt2.query(dm2_all_small[mapping12[sid]], k)[1][0] dm_small2 = dm2_all[idx2][:,idx2] e2 = dm_small2[np.triu_indices(dm_small2.shape[0],1)] #loss = loss + Graph_KN(e1, e2, weight=1., num_bins = 128) #similarity = similarity + Pyramid_KN(e1, e2, weight=1., num_bins = 128) loss = loss + Pyramid_KN(e1, e2, weight=1., num_bins = 128) return loss
def find_stationary_var(amat=None, bmat=None, cmat=None): """Find fixed point of H = CC' + AHA' + BHB' given A, B, C. Parameters ---------- amat, bmat, cmat : (nstocks, nstocks) arrays Parameter matrices Returns ------- (nstocks, nstocks) array Unconditional variance matrix """ nstocks = amat.shape[0] kwargs = {'amat': amat, 'bmat': bmat, 'ccmat': cmat.dot(cmat.T)} fun = partial(ParamGeneric.fixed_point, **kwargs) try: with np.errstate(divide='ignore', invalid='ignore'): hvar = np.eye(nstocks) sol = sco.fixed_point(fun, hvar[np.tril_indices(nstocks)]) hvar[np.tril_indices(nstocks)] = sol hvar[np.triu_indices(nstocks, 1)] \ = hvar.T[np.triu_indices(nstocks, 1)] return hvar except RuntimeError: # warnings.warn('Could not find stationary varaince!') return None
def get_query_clusters(points, k): ''' points [n,m] - array for n points with dimention m - encoded query ''' # normalize input points = normalize(points.astype(np.float)) # get similarity matrix (cosine distance) dist = points.dot(points.T) # initialize variables n_pt = len(points) cluster_old, cluster_new = np.ones(n_pt), np.zeros(n_pt) # special case, no clustering if k==1 or n_pt==1: return np.zeros(n_pt), 1 if n_pt==1 else np.mean(dist[np.triu_indices(n_pt,k=1)]) # randomly choose k starting centroids centroids = points[np.random.permutation(n_pt)[:k]] while not np.array_equal(cluster_old, cluster_new): cluster_old = cluster_new # get cluster index for each point cluster_new = np.argmax(points.dot(centroids.T), axis=1) # get new centroids, and within class mean distance/similarity centroids, in_dist = [], [] for c in np.unique(cluster_new): pid = cluster_new==c # set new centroid as the one who has minimum total distance to rest of the points in the cluster cid = np.argmax(np.sum(dist[np.ix_(pid, pid)], axis=1)) centroids.append(points[pid][cid]) in_dist.append(1 if sum(pid)==1 else np.mean(dist[np.ix_(pid,pid)][np.triu_indices(sum(pid),k=1)])) centroids = np.array(centroids) # traditional way to get new centroid, not working well for cosine distance # centroids = normalize([np.mean(points[cluster_new==c], axis=0) for c in np.unique(cluster_new)]) return cluster_new, np.mean(in_dist), centroids
def dynamically_bin(hic1, hic2, chrom, binbounds): unbinned1, map1 = hic1.cis_heatmap(chrom, start=binbounds[0, 0], stop=binbounds[-1, 1], datatype='fend', arraytype='full', returnmapping=True) unbinned2, map2 = hic2.cis_heatmap(chrom, start=binbounds[0, 0], stop=binbounds[-1, 1], datatype='fend', arraytype='full', returnmapping=True) map1[:, 2] = (map1[:, 0] + map1[:, 1]) map2[:, 2] = (map2[:, 0] + map2[:, 1]) allmap = numpy.vstack((map1, map2)) allmap = allmap[numpy.argsort(allmap[:, 2]), :] indices1 = numpy.searchsorted(allmap[:, 2], map1[:, 2]) indices1_1 = (indices1.reshape(-1, 1) * allmap.shape[0] + indices1.reshape(1, -1)).ravel() indices2 = numpy.searchsorted(allmap[:, 2], map2[:, 2]) indices2_1 = (indices2.reshape(-1, 1) * allmap.shape[0] + indices2.reshape(1, -1)).ravel() unbinned = numpy.zeros((allmap.shape[0], allmap.shape[0], 2), dtype=numpy.float32) unbinned[:, :, 0] += numpy.bincount(indices1_1, minlength=allmap.shape[0] ** 2, weights=unbinned1[:, :, 0].ravel()).reshape(allmap.shape[0], -1) unbinned[:, :, 1] += numpy.bincount(indices1_1, minlength=allmap.shape[0] ** 2, weights=unbinned1[:, :, 1].ravel()).reshape(allmap.shape[0], -1) unbinned[:, :, 0] += numpy.bincount(indices2_1, minlength=allmap.shape[0] ** 2, weights=unbinned2[:, :, 0].ravel()).reshape(allmap.shape[0], -1) unbinned[:, :, 1] += numpy.bincount(indices2_1, minlength=allmap.shape[0] ** 2, weights=unbinned2[:, :, 1].ravel()).reshape(allmap.shape[0], -1) indices = numpy.triu_indices(allmap.shape[0], 1) unbinned = unbinned[indices[0], indices[1], :] binned, binmap = hic1.cis_heatmap(chrom, binbounds=binbounds, datatype='fend', arraytype='full', returnmapping=True) binned += hic2.cis_heatmap(chrom, binbounds=binbounds, datatype='fend', arraytype='full') indices = numpy.triu_indices(binbounds.shape[0], 1) upper = binned[indices[0], indices[1], :] hifive.hic_binning.dynamically_bin_cis_array(unbinned, allmap, upper, binmap, expansion_binsize=0, minobservations=25) binned[indices[0], indices[1], :] = upper binned[indices[1], indices[0], :] = upper return binned
def loss_function2(mapping12): """Computes the loss function of a given mapping. Using the graph kernel of two sets of distance. """ global dis_1, dis_2 global kdt_1, kdt_2 global dm1, dm2, dm1_all, dm2_all k = 15 radius = 100 similarity = 0.0 for sid in np.arange(len(pro_1)): idx1 = kdt_1.query_radius(dm1[sid], radius)[0] #idx1 = kdt_1.query(dm1[sid], k)[1][0] dm_small1 = dm1_all[idx1][:,idx1] e1 = dm_small1[np.triu_indices(dm_small1.shape[0],1)] idx2 = kdt_2.query_radius(dis_2[mapping12[sid]], radius)[0] #idx2 = kdt_2.query(dis_2[mapping12[sid]], k)[1][0] dm_small2 = dm2_all[idx2][:,idx2] e2 = dm_small2[np.triu_indices(dm_small2.shape[0],1)] #loss = loss + Graph_KN(e1, e2, weight=1., num_bins = 128) similarity = similarity + Pyramid_KN(e1, e2, weight=1., num_bins = 128) return 1./similarity
def sim_matrix_within_group_means(matrix, n1): """ Computes the mean of the upper triangle (k=1) for the blocks (0,n-1)*(0,n-1) and (n,2n-1)*(n,2n-1), and their difference (for convenience). Parameters ---------- matrix : 2D symmetric numpy array 1 or 2 dimensional numpy array, the n1 first indices in the zeroth axis of the array, should correspond to the values of the first group. The value of ``matrix[i][j]`` should correspond to n1 : int the number of elements in the first group Returns ------- mean1 : float the average similarity between members in the first group mean2: float the average similarity between members in the second group mean1-mean2: float just mean1-mean2 (as a convenience for stat. testing) """ n2 = matrix.shape[0] - n1 indices1 = np.triu_indices(n1, k=1) indices2base = np.triu_indices(n2, k=1) indices2I = indices2base[0].copy() + n1 indices2J = indices2base[1].copy() + n1 indices2 = (indices2I, indices2J) mean1 = np.average(matrix[indices1]) mean2 = np.average(matrix[indices2]) return mean1, mean2, mean1 - mean2
def example_one(): """ Generates a set of sample data for the examples page of the hetaira web tool. """ np.random.seed(5) ids = ['Pr'] + list(ascii_lowercase) + ['Sp'] # make some data where all activities are the same data = np.ones((26,26)) # make some random activites to pull from y = np.random.uniform(1000, 2500, (26,26)) # this will replace the ones with numbers from the uniform # distribution, increasing by one at each column # using the upper triangular matrix data[np.triu_indices(26)] = y[np.triu_indices(26)] # stack a perfectly promiscuous and a perfectly (almost) # specific column on either side of the data data = np.hstack((np.full((26,1), 1e-10), data, np.ones((26,1)))) data[0,0] = 100 descriptors = None example = Promiscuity(ids, np.fliplr(data), descriptors) return example.hetaira_results()
def normalization(self): """ Normalize the equilibrium steady state correlations according to Eq 76 in Lorenzo's writeup """ N = self.latsize #First disconnect self.disconnect(self.steady_state) norm_1 = N+np.sum(self.steady_state[2*N:3*N]) sxxpsyy = self.steady_state[3*N:].reshape(3,3,N,N)[0,0,:,:] +\ self.steady_state[3*N:].reshape(3,3,N,N)[1,1,:,:] sxymsyx = self.steady_state[3*N:].reshape(3,3,N,N)[0,1,:,:] -\ self.steady_state[3*N:].reshape(3,3,N,N)[1,0,:,:] norms = [] for kvec in self.kvecs: argmat = np.zeros((N,N)) for (m,n) in combinations(np.arange(N),2): argmat[m,n] = kvec.dot(self.atoms[m].coords-self.atoms[n].coords) norm_2 = np.sum(\ np.cos(argmat[np.triu_indices(N, k=1)]) *\ sxxpsyy[np.triu_indices(N, k=1)] +\ np.sin(argmat[np.triu_indices(N, k=1)]) *\ sxymsyx[np.triu_indices(N, k=1)]) norms.append(0.5*(norm_1+norm_2)) #Reconnect before exit self.reconnect(self.steady_state) return np.array(norms).flatten()
def calcH2Continuous(XXT, phe, keepArr, prev, h2coeff): t = stats.norm(0,1).isf(prev) phit = stats.norm(0,1).pdf(t) K1 = 1 - prev K2 = 1 - K1 P = np.sum(phe<t) / float(phe.shape[0]) P2 = 1.0 P1 = K2*P2*P / (K1*(1-P)) R = P2 / P1 XXT = XXT[np.ix_(keepArr, keepArr)] phe = phe[keepArr] xCoeff = (((R-1)*phit*t + K1 + R*K2)**2 * (K1+R*K2)**2 - ((R-1)*phit)**4) / (K1 + R*K2)**4 x = (xCoeff * h2coeff) * XXT pheMean = 0 pheVar = 1 y = np.outer((phe-pheMean) / np.sqrt(pheVar), (phe-pheMean)/np.sqrt(pheVar)) y -= ((R-1)*phit / (K1+R*K2))**2 y = y[np.triu_indices(y.shape[0], 1)] x = x[np.triu_indices(x.shape[0], 1)] slope, intercept, rValue, pValue, stdErr = stats.linregress(x,y) return slope
def __init__(self, endmembers, alphas, energy_interaction, volume_interaction=None, entropy_interaction=None): self.n_endmembers = len(endmembers) # Create array of van Laar parameters self.alphas = np.array(alphas) # Create 2D arrays of interaction parameters self.We = np.triu(2. / (self.alphas[:, np.newaxis] + self.alphas), 1) self.We[np.triu_indices(self.n_endmembers, 1)] *= np.array([i for row in energy_interaction for i in row]) if entropy_interaction is not None: self.Ws = np.triu(2. / (self.alphas[:, np.newaxis] + self.alphas), 1) self.Ws[np.triu_indices(self.n_endmembers, 1)] *= np.array([i for row in entropy_interaction for i in row]) else: self.Ws = np.zeros((self.n_endmembers, self.n_endmembers)) if volume_interaction is not None: self.Wv = np.triu(2. / (self.alphas[:, np.newaxis] + self.alphas), 1) self.Wv[np.triu_indices(self.n_endmembers, 1)] *= np.array([i for row in volume_interaction for i in row]) else: self.Wv = np.zeros((self.n_endmembers, self.n_endmembers)) # initialize ideal solution model IdealSolution.__init__(self, endmembers)
def angles_and_contour_lengths(spline,deriv, min_change_px=0,max_change_px=np.inf): """ gets Cos(Theta(i)) and L(i), where i runs along the spline order given, and L is the contour length between segments chosen at index i Args: spline: tuple of x_spline,y_spline -- x and y values of the line, size N deriv: the continuous derivative of spline, size N <min/max>_change_px: the minimum and maximum pixel changes Returns: tuple of angle_info object, L0_px """ # get the x and y coordinates of the spline x_spline,y_spline = spline x_deriv,y_deriv = deriv deriv_unit_vector = np.array((x_deriv,y_deriv)) deriv_unit_vector /= np.sqrt(np.sum(np.abs(deriv_unit_vector**2),axis=0)) assert ((np.sum(deriv_unit_vector**2,axis=0) -1) < 1e-6).all() , \ "Unit vectors not correct" # POST: unit vector are normalized, |v| = 1 dx_spline = np.array([0] + list(np.diff(x_spline))) dy_spline = np.array([0] + list(np.diff(y_spline))) # d_spline(i) is the change from i-i to i (zero if i=0) d_spline = np.sqrt(dx_spline**2 + dy_spline**2) assert (dx_spline <= d_spline).all() contour_lengths = np.cumsum(d_spline) L0 = contour_lengths[-1] n = x_spline.shape[0] contour_length_matrix = _difference_matrix(contour_lengths,contour_lengths) dx_deriv = deriv_unit_vector[0, :] dy_deriv = deriv_unit_vector[1, :] angle2 = np.arctan2(dy_deriv, dx_deriv) angle_diff_matrix = _difference_matrix(angle2.T, angle2.T) # normalize to 0 to 2*pi where_le_0 = np.where(angle_diff_matrix < 0) angle_diff_matrix[where_le_0] += 2 * np.pi assert ((angle_diff_matrix >= 0) & (angle_diff_matrix <= 2*np.pi)).all() # POST: angles calculated correctly... # only look at the upper triangular part idx_upper_tri = np.triu_indices(n) idx_upper_tri_no_diag =np.triu_indices(n,k=1) # upper diagonal should have >0 contour length assert (contour_length_matrix[idx_upper_tri_no_diag] > 0).all() , \ "Contour lengths should be positive" # POST: contour lengths and angles make sense; we only want upper triangular # (*including* the trivial 0,0 point along the diagonal) contour_length_matrix_check_valid = contour_length_matrix[idx_upper_tri] # POST: matrix is filled in, determine where the value are valid ok_idx = np.where( (contour_length_matrix_check_valid > min_change_px) & (contour_length_matrix_check_valid < max_change_px)) sanit = lambda x: x[idx_upper_tri][ok_idx].flatten() sort_idx = np.argsort(sanit(contour_length_matrix)) sanit_and_sort = lambda x: sanit(x)[sort_idx] # return everything sorted as per sort_idx flat_L = sanit_and_sort(contour_length_matrix) flat_angle = np.arccos(np.cos(sanit_and_sort(angle_diff_matrix))) to_ret = angle_info(theta=flat_angle, L_px=flat_L) return to_ret,L0
def simple_neighbors_1d(n): """ Return connectivity for simple 1D neighbors. """ c = np.zeros((n,n)) c[np.triu_indices(n,1)] = 1 c[np.triu_indices(n,2)] = 0 return c
def coefs2mats(coefs, n=8): const = coefs[0] jac = coefs[1:n+1] hes = np.zeros((n,n)) hes[np.triu_indices(n)] = hes.T[np.triu_indices(n)] = coefs[n+1:] hes[np.diag_indices(n)] *= 2 return const, jac, hes
def from_integral(self, integral): Z = integral[0] m = integral[1: (self.dim + 1)] / Z V = np.zeros((self.dim, self.dim)) idx = np.triu_indices(self.dim) V[idx] = integral[(self.dim + 1):] / Z V.T[np.triu_indices(self.dim)] = V[idx] V -= np.dot(m.reshape(m.size, 1), m.reshape(1, m.size)) return Gaussian(m, V, Z=Z)
def untri(vec, k=0, fill=0): # solution of n (n + 1) / 2 = len(vec) n = (np.sqrt(1 + 8 * len(vec)) - 1) / 2 n += k m = np.empty((n, n)) m.fill(fill) m[np.triu_indices(n, k=k)] = vec m.T[np.triu_indices(n, k=k)] = vec return m
def corrComp(dmatA, dmatB, method): n = dmatB.shape[0] if method == 'pearson': rho, p = stats.pearsonr(dmatA[np.triu_indices(n, k=1)], dmatB[np.triu_indices(n, k=1)]) elif method == 'spearman': rho, p = stats.spearmanr(dmatA[np.triu_indices(n, k=1)], dmatB[np.triu_indices(n, k=1)]) else: raise ValueError('Must specify method as "pearson" or "spearman"') return rho
def __init__(self, n, p, *args, **kwargs): self.n = n self.p = p n_elem = p * (p - 1) / 2 self.mean = np.zeros(n_elem) super(LKJCorr, self).__init__(shape=n_elem, *args, **kwargs) self.tri_index = np.zeros([p, p], dtype=int) self.tri_index[np.triu_indices(p, k=1)] = np.arange(n_elem) self.tri_index[np.triu_indices(p, k=1)[::-1]] = np.arange(n_elem)
def prior(value, n_pix = n_pix, max_value = K, hyper_params = Lambda[np.triu_indices(n_pix)]): """2nd order prior for object maps""" #num_pairs = hyper_params.size if (np.min(value) < 1) or (np.max(value) > max_value): return -np.Inf else: on_offs = outer_map(value, n_pix, max_value) on_offs = on_offs[np.triu_indices(n_pix)].astype('int') return pm.bernoulli_like(on_offs, hyper_params.ravel())
def scrape_args(self, records, scale=1, guide_tree=None, niters=10, keep_topology=False): # local lists distances = [] variances = [] headers = [] for rec in records: distances.append(rec.parameters.partitions.distances) variances.append(rec.parameters.partitions.variances) headers.append(rec.get_names()) num_matrices = len(records) label_set = reduce(lambda x, y: x.union(y), (set(l) for l in headers)) labels_len = len(label_set) # labels string can be built straight away labels_string = '{0}\n{1}\n'.format(labels_len, ' '.join(label_set)) # distvar and genome_map need to be built up distvar_list = [str(num_matrices)] genome_map_list = ['{0} {1}'.format(num_matrices, labels_len)] # build up lists to turn into strings for i in range(num_matrices): labels = headers[i] dim = len(labels) dmatrix = np.array(distances[i]) vmatrix = np.array(variances[i]) matrix = np.zeros(dmatrix.shape) matrix[np.triu_indices(len(dmatrix), 1)] = dmatrix[np.triu_indices(len(dmatrix), 1)] matrix[np.tril_indices(len(vmatrix), -1)] = vmatrix[np.tril_indices(len(vmatrix), -1)] if scale: matrix[np.triu_indices(dim, 1)] *= scale matrix[np.tril_indices(dim, -1)] *= scale * scale if isinstance(matrix, np.ndarray): matrix_string = '\n'.join([' '.join(str(x) for x in row) for row in matrix]) + '\n' else: matrix_string = matrix distvar_list.append('{0} {0} {1}\n{2}'.format(dim, i + 1, matrix_string)) genome_map_entry = ' '.join((str(labels.index(lab) + 1) if lab in labels else '-1') for lab in label_set) genome_map_list.append(genome_map_entry) distvar_string = '\n'.join(distvar_list) genome_map_string = '\n'.join(genome_map_list) if guide_tree is None: guide_tree = Tree.new_iterative_rtree(labels_len, names=label_set, rooted=True) tree_string = guide_tree.scale(scale).newick.replace('\'', '') return distvar_string, genome_map_string, labels_string, tree_string, niters, keep_topology
def corrcoef(matrix): r = np.corrcoef(matrix) rf = r[np.triu_indices(r.shape[0], 1)] df = matrix.shape[1] - 2 ts = rf * rf * (df / (1 - rf * rf)) pf = betai(0.5 * df, 0.5, df / (df + ts)) p = np.zeros(shape=r.shape) p[np.triu_indices(p.shape[0], 1)] = pf p[np.tril_indices(p.shape[0], -1)] = pf p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0]) return r, p
def __init__(self, n, p,s=2,structure = (), *args, **kwargs): self.n = n self.p = p self.s = s n_elem = structure[0]#int(p * (p - 1) / 2) self.mean = np.zeros((s,n_elem)) super(LKJCorr_mult_2_structure, self).__init__(shape=(s,n_elem), *args, **kwargs) self.tri_index = np.zeros([p, p], dtype=int) self.tri_index[np.triu_indices(p, k=1)] = structure[1] self.tri_index[np.triu_indices(p, k=1)[::-1]] = structure[1]
def _get_lvec(label_vals, pivots, scales, derivs): """ Constructs a label vector for an arbitrary number of labels Assumes that our model is quadratic in the labels Comment: this is really slow, but we will only have to compute it once! Parameters ---------- label_vals: numpy ndarray, shape (nstars, nlabels) labels pivots: numpy ndarray, shape (nlabels, ) offset we subtract from the label_vals scales: numpy ndarray, shape (nlabels, ) scale we divide out of the label_vals derivs: return also the derivatives of the vector wrt the labels Returns ------- lvec: numpy ndarray label vector dlvec_dl: numpy ndarray (if derivs) label vector derivatives Notes -------- lvec_derivs and lvec is now in units of the scaled labels! """ if len(label_vals.shape) == 1: label_vals = np.array([label_vals]) nlabels = label_vals.shape[1] nstars = label_vals.shape[0] # specialized to second-order model linear_offsets = (label_vals - pivots[None, :]) / scales[None, :] quadratic_offsets = np.array([np.outer(m, m)[np.triu_indices(nlabels)] for m in (linear_offsets)]) ones = np.ones((nstars, 1)) lvec = np.hstack((ones, linear_offsets, quadratic_offsets)) if not derivs: return lvec ones_derivs = np.zeros((nstars, 1, nlabels)) linear_derivs = np.zeros((nstars, nlabels, nlabels)) for i in range(nstars): linear_derivs[i] = np.eye(nlabels) quadratic_derivs = np.zeros((nstars, len(quadratic_offsets[1]), nlabels)) for n in range(nstars): for k in range(nlabels): foo = np.zeros((nlabels, nlabels)) foo[k, :] = linear_offsets[n] foo[:, k] = linear_offsets[n] quadratic_derivs[n, :, k] = np.array(foo[np.triu_indices(nlabels)]) lvec_derivs = np.hstack((ones_derivs, linear_derivs, quadratic_derivs)) return lvec, lvec_derivs
def linkage_tree( X, connectivity=None, n_clusters=None, linkage="complete", affinity="euclidean", return_distance=False, ): """Linkage agglomerative clustering based on a Feature matrix. The inertia matrix uses a Heapq-based representation. This is the structured version, that takes into account some topological structure between samples. Read more in the :ref:`User Guide <hierarchical_clustering>`. Parameters ---------- X : array-like of shape (n_samples, n_features) Feature matrix representing `n_samples` samples to be clustered. connectivity : sparse matrix, default=None Connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is `None`, i.e, the Ward algorithm is unstructured. n_clusters : int, default=None Stop early the construction of the tree at `n_clusters`. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix. linkage : {"average", "complete", "single"}, default="complete" Which linkage criteria to use. The linkage criterion determines which distance to use between sets of observation. - "average" uses the average of the distances of each observation of the two sets. - "complete" or maximum linkage uses the maximum distances between all observations of the two sets. - "single" uses the minimum of the distances between all observations of the two sets. affinity : str or callable, default='euclidean' Which metric to use. Can be 'euclidean', 'manhattan', or any distance known to paired distance (see metric.pairwise). return_distance : bool, default=False Whether or not to return the distances between the clusters. Returns ------- children : ndarray of shape (n_nodes-1, 2) The children of each non-leaf node. Values less than `n_samples` correspond to leaves of the tree which are the original samples. A node `i` greater than or equal to `n_samples` is a non-leaf node and has children `children_[i - n_samples]`. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i`. n_connected_components : int The number of connected components in the graph. n_leaves : int The number of leaves in the tree. parents : ndarray of shape (n_nodes, ) or None The parent of each node. Only returned when a connectivity matrix is specified, elsewhere 'None' is returned. distances : ndarray of shape (n_nodes-1,) Returned when `return_distance` is set to `True`. distances[i] refers to the distance between children[i][0] and children[i][1] when they are merged. See Also -------- ward_tree : Hierarchical clustering with ward linkage. """ X = np.asarray(X) if X.ndim == 1: X = np.reshape(X, (-1, 1)) n_samples, n_features = X.shape linkage_choices = { "complete": _hierarchical.max_merge, "average": _hierarchical.average_merge, "single": None, } # Single linkage is handled differently try: join_func = linkage_choices[linkage] except KeyError as e: raise ValueError( "Unknown linkage option, linkage should be one of %s, but %s was given" % (linkage_choices.keys(), linkage) ) from e if affinity == "cosine" and np.any(~np.any(X, axis=1)): raise ValueError("Cosine affinity cannot be used when X contains zero vectors") if connectivity is None: from scipy.cluster import hierarchy # imports PIL if n_clusters is not None: warnings.warn( "Partial build of the tree is implemented " "only for structured clustering (i.e. with " "explicit connectivity). The algorithm " "will build the full tree and only " "retain the lower branches required " "for the specified number of clusters", stacklevel=2, ) if affinity == "precomputed": # for the linkage function of hierarchy to work on precomputed # data, provide as first argument an ndarray of the shape returned # by sklearn.metrics.pairwise_distances. if X.shape[0] != X.shape[1]: raise ValueError( f"Distance matrix should be square, got matrix of shape {X.shape}" ) i, j = np.triu_indices(X.shape[0], k=1) X = X[i, j] elif affinity == "l2": # Translate to something understood by scipy affinity = "euclidean" elif affinity in ("l1", "manhattan"): affinity = "cityblock" elif callable(affinity): X = affinity(X) i, j = np.triu_indices(X.shape[0], k=1) X = X[i, j] if ( linkage == "single" and affinity != "precomputed" and not callable(affinity) and affinity in METRIC_MAPPING ): # We need the fast cythonized metric from neighbors dist_metric = DistanceMetric.get_metric(affinity) # The Cython routines used require contiguous arrays X = np.ascontiguousarray(X, dtype=np.double) mst = _hierarchical.mst_linkage_core(X, dist_metric) # Sort edges of the min_spanning_tree by weight mst = mst[np.argsort(mst.T[2], kind="mergesort"), :] # Convert edge list into standard hierarchical clustering format out = _hierarchical.single_linkage_label(mst) else: out = hierarchy.linkage(X, method=linkage, metric=affinity) children_ = out[:, :2].astype(int, copy=False) if return_distance: distances = out[:, 2] return children_, 1, n_samples, None, distances return children_, 1, n_samples, None connectivity, n_connected_components = _fix_connectivity( X, connectivity, affinity=affinity ) connectivity = connectivity.tocoo() # Put the diagonal to zero diag_mask = connectivity.row != connectivity.col connectivity.row = connectivity.row[diag_mask] connectivity.col = connectivity.col[diag_mask] connectivity.data = connectivity.data[diag_mask] del diag_mask if affinity == "precomputed": distances = X[connectivity.row, connectivity.col].astype(np.float64, copy=False) else: # FIXME We compute all the distances, while we could have only computed # the "interesting" distances distances = paired_distances( X[connectivity.row], X[connectivity.col], metric=affinity ) connectivity.data = distances if n_clusters is None: n_nodes = 2 * n_samples - 1 else: assert n_clusters <= n_samples n_nodes = 2 * n_samples - n_clusters if linkage == "single": return _single_linkage_tree( connectivity, n_samples, n_nodes, n_clusters, n_connected_components, return_distance, ) if return_distance: distances = np.empty(n_nodes - n_samples) # create inertia heap and connection matrix A = np.empty(n_nodes, dtype=object) inertia = list() # LIL seems to the best format to access the rows quickly, # without the numpy overhead of slicing CSR indices and data. connectivity = connectivity.tolil() # We are storing the graph in a list of IntFloatDict for ind, (data, row) in enumerate(zip(connectivity.data, connectivity.rows)): A[ind] = IntFloatDict( np.asarray(row, dtype=np.intp), np.asarray(data, dtype=np.float64) ) # We keep only the upper triangular for the heap # Generator expressions are faster than arrays on the following inertia.extend( _hierarchical.WeightedEdge(d, ind, r) for r, d in zip(row, data) if r < ind ) del connectivity heapify(inertia) # prepare the main fields parent = np.arange(n_nodes, dtype=np.intp) used_node = np.ones(n_nodes, dtype=np.intp) children = [] # recursive merge loop for k in range(n_samples, n_nodes): # identify the merge while True: edge = heappop(inertia) if used_node[edge.a] and used_node[edge.b]: break i = edge.a j = edge.b if return_distance: # store distances distances[k - n_samples] = edge.weight parent[i] = parent[j] = k children.append((i, j)) # Keep track of the number of elements per cluster n_i = used_node[i] n_j = used_node[j] used_node[k] = n_i + n_j used_node[i] = used_node[j] = False # update the structure matrix A and the inertia matrix # a clever 'min', or 'max' operation between A[i] and A[j] coord_col = join_func(A[i], A[j], used_node, n_i, n_j) for col, d in coord_col: A[col].append(k, d) # Here we use the information from coord_col (containing the # distances) to update the heap heappush(inertia, _hierarchical.WeightedEdge(d, k, col)) A[k] = coord_col # Clear A[i] and A[j] to save memory A[i] = A[j] = 0 # Separate leaves in children (empty lists up to now) n_leaves = n_samples # # return numpy array for efficient caching children = np.array(children)[:, ::-1] if return_distance: return children, n_connected_components, n_leaves, parent, distances return children, n_connected_components, n_leaves, parent
def build_eom_matrices(self, excitations_list, q_commutators, w_commutators, m_commutators, v_commutators, available_entry, wave_fn, quantum_instance=None): """Compute M, V, Q and W matrices. Args: excitations_list (list): single excitations list + double excitation list q_commutators (dict): key: a string of matrix indices; value: the commutators for Q matrix w_commutators (dict): key: a string of matrix indices; value: the commutators for W matrix m_commutators (dict): key: a string of matrix indices; value: the commutators for M matrix v_commutators (dict): key: a string of matrix indices; value: the commutators for V matrix available_entry (int): number of entries in the matrix wave_fn (QuantumCircuit or numpy.ndarray): the circuit generated wave function for the ground state energy quantum_instance (QuantumInstance): a quantum instance with configured settings Returns: numpy.ndarray: M matrix numpy.ndarray: V matrix numpy.ndarray: Q matrix numpy.ndarray: W matrix Raises: AquaError: wrong setting for wave_fn and quantum_instance """ if isinstance(wave_fn, QuantumCircuit) and quantum_instance is None: raise AquaError( "quantum_instance is required when wavn_fn is a QuantumCircuit." ) size = len(excitations_list) logger.info('EoM matrix size is {}x{}.'.format(size, size)) # get all to-be-processed index if self._is_eom_matrix_symmetric: mus, nus = np.triu_indices(size) else: mus, nus = np.indices((size, size)) mus = np.asarray(mus.flat) nus = np.asarray(nus.flat) m_mat = np.zeros((size, size), dtype=complex) v_mat = np.zeros((size, size), dtype=complex) q_mat = np.zeros((size, size), dtype=complex) w_mat = np.zeros((size, size), dtype=complex) m_mat_std, v_mat_std, q_mat_std, w_mat_std = 0, 0, 0, 0 if quantum_instance is not None: circuit_names = [] circuits = [] for idx in range(len(mus)): mu = mus[idx] nu = nus[idx] for op in [ q_commutators[mu][nu], w_commutators[mu][nu], m_commutators[mu][nu], v_commutators[mu][nu] ]: if op is not None and not op.is_empty(): curr_circuits = op.construct_evaluation_circuit( wave_function=wave_fn, statevector_mode=quantum_instance.is_statevector) for c in curr_circuits: if c.name not in circuit_names: circuits.append(c) circuit_names.append(c.name) result = quantum_instance.execute(circuits) # evaluate results for idx in range(len(mus)): mu = mus[idx] nu = nus[idx] def _get_result(op): mean, std = 0.0, 0.0 if op is not None and not op.is_empty(): mean, std = op.evaluate_with_result( result=result, statevector_mode=quantum_instance.is_statevector) return mean, std q_mean, q_std = _get_result(q_commutators[mu][nu]) w_mean, w_std = _get_result(w_commutators[mu][nu]) m_mean, m_std = _get_result(m_commutators[mu][nu]) v_mean, v_std = _get_result(v_commutators[mu][nu]) q_mat[mu][nu] = q_mean if q_mean != 0.0 else q_mat[mu][nu] w_mat[mu][nu] = w_mean if w_mean != 0.0 else w_mat[mu][nu] m_mat[mu][nu] = m_mean if m_mean != 0.0 else m_mat[mu][nu] v_mat[mu][nu] = v_mean if v_mean != 0.0 else v_mat[mu][nu] q_mat_std += q_std w_mat_std += w_std m_mat_std += m_std v_mat_std += v_std else: for idx in range(len(mus)): mu = mus[idx] nu = nus[idx] q_mean, q_std = q_commutators[mu][nu].evaluate_with_statevector(wave_fn) \ if q_commutators[mu][nu] is not None else (0.0, 0.0) w_mean, w_std = w_commutators[mu][nu].evaluate_with_statevector(wave_fn) \ if w_commutators[mu][nu] is not None else (0.0, 0.0) m_mean, m_std = m_commutators[mu][nu].evaluate_with_statevector(wave_fn) \ if m_commutators[mu][nu] is not None else (0.0, 0.0) v_mean, v_std = v_commutators[mu][nu].evaluate_with_statevector(wave_fn) \ if v_commutators[mu][nu] is not None else (0.0, 0.0) q_mat[mu][nu] = q_mean if q_mean != 0.0 else q_mat[mu][nu] w_mat[mu][nu] = w_mean if w_mean != 0.0 else w_mat[mu][nu] m_mat[mu][nu] = m_mean if m_mean != 0.0 else m_mat[mu][nu] v_mat[mu][nu] = v_mean if v_mean != 0.0 else v_mat[mu][nu] if self._is_eom_matrix_symmetric: q_mat = q_mat + q_mat.T - np.identity(q_mat.shape[0]) * q_mat w_mat = w_mat + w_mat.T - np.identity(w_mat.shape[0]) * w_mat m_mat = m_mat + m_mat.T - np.identity(m_mat.shape[0]) * m_mat v_mat = v_mat + v_mat.T - np.identity(v_mat.shape[0]) * v_mat q_mat = np.real(q_mat) w_mat = np.real(w_mat) m_mat = np.real(m_mat) v_mat = np.real(v_mat) q_mat_std = q_mat_std / float(available_entry) w_mat_std = w_mat_std / float(available_entry) m_mat_std = m_mat_std / float(available_entry) v_mat_std = v_mat_std / float(available_entry) logger.debug("\nQ:=========================\n{}".format(q_mat)) logger.debug("\nW:=========================\n{}".format(w_mat)) logger.debug("\nM:=========================\n{}".format(m_mat)) logger.debug("\nV:=========================\n{}".format(v_mat)) return m_mat, v_mat, q_mat, w_mat, m_mat_std, v_mat_std, q_mat_std, w_mat_std
init_vel_disp = 3 perc_error = 0.001 xycorr = 0.0 xzcorr = 0.0 yzcorr = 0.0 corrs = [xycorr, xzcorr, yzcorr] stdevs = [ init_pos_disp, init_pos_disp, init_pos_disp, init_vel_disp, init_vel_disp, init_vel_disp ] cov = np.eye(6) cov[np.tril_indices(3, -1)] = corrs cov[np.triu_indices(3, 1)] = corrs for i in range(3): cov[:3, i] *= stdevs[:3] cov[i, :3] *= stdevs[:3] for i in range(3, 6): cov[3:6, i] *= stdevs[3:] cov[i, 3:6] *= stdevs[3:] print(cov) np.random.seed(0) nstars = 30 # generate initial stars from an arbitrary covariance matrix
aparc = mne.read_labels_from_annot(subject, subjects_dir=subjects_dir, parc='aparc') # nodes in one hemisphere can be plotted as well aparc_lh = [lab for lab in aparc if lab.hemi == 'lh'] coords = [] # plot 10 nodes from left hemisphere only for better viz for lab in aparc_lh[:10]: if lab.name is 'unknown-lh': continue # get the center of mass com = lab.center_of_mass('fsaverage') # obtain mni coordinated to the vertex from left hemi coords_ = mne.vertex_to_mni(com, hemis=0, subject=subject, subjects_dir=subjects_dir)[0] coords.append(coords_) n_nodes = np.array(coords).shape[0] # make a random connectivity matrix con = np.random.random((n_nodes, n_nodes)) con[np.diag_indices(5)] = 0. con[np.triu_indices(5, k=1)] = 0. con += con.T con[con < 0.6] = 0. # plot the connectome on a glass brain background plotting.plot_connectome(con, coords) plt.show()
switch = nib.load('./Neurosynth/switching_pAgF_z.nii') # anti rt switch = np.array(switch.dataobj) # switch2 = nib.load('./Neurosynth/switch_pAgF_z.nii') # anti rt affine = switch2.affine switch2 = np.array(switch2.dataobj) # nogo = nib.load('./Neurosynth/nogo_pAgF_z.nii') # anti(?) nogo = np.array(nogo.dataobj) # ############################################# # LOADING CORRELATION MATRIX FROM OUR TASKS # ############################################# model = np.loadtxt('task6_corr.csv', delimiter=',') model = model[np.triu_indices(6, k=1)] # extracting the upper triangle #print(model) model = np.reshape(model, (model.shape[0], 1)) model = pd.DataFrame(model) #transform it into DataFrame type to correlate # Initialize an array to add up the correlations brain_corr = np.zeros((91, 109, 91)) #Initialize an array to count how many times region wass scoped for correlations brain_count = np.zeros((91, 109, 91)) ###################### # SETTINGS # ###################### # Steps taken per iteration stride = 1
def _triu_indices(n): rows, cols = np.triu_indices(n) return rows * n + cols
def plot_scatter(self, plot_filename, plot_title='', image_format=None, log1p=False, xRange=None, yRange=None): """ Plot the scatter plots of a matrix in which each row is a sample """ num_samples = self.matrix.shape[1] corr_matrix = self.compute_correlation() grids = gridspec.GridSpec(num_samples, num_samples) grids.update(wspace=0, hspace=0) fig = plt.figure(figsize=(2 * num_samples, 2 * num_samples)) plt.rcParams['font.size'] = 8.0 plt.suptitle(plot_title) if log1p is True: self.matrix = np.log1p(self.matrix) min_xvalue = self.matrix.min() max_xvalue = self.matrix.max() min_yvalue = min_xvalue max_yvalue = max_xvalue if xRange is not None: min_xvalue = xRange[0] max_xvalue = xRange[1] if yRange is not None: min_yvalue = yRange[0] max_yvalue = yRange[1] if (min_xvalue % 2 == 0 and max_xvalue % 2 == 0) or \ (min_xvalue % 1 == 0 and max_xvalue % 2 == 1): # make one value odd and the other even max_xvalue += 1 if (min_yvalue % 2 == 0 and max_yvalue % 2 == 0) or \ (min_yvalue % 1 == 0 and max_yvalue % 2 == 1): # make one value odd and the other even max_yvalue += 1 # plotly output if image_format == 'plotly': self.plotly_scatter(plot_filename, corr_matrix, plot_title=plot_title, minXVal=min_xvalue, maxXVal=max_xvalue, minYVal=min_yvalue, maxYVal=max_yvalue) return rows, cols = np.triu_indices(num_samples) for index in range(len(rows)): row = rows[index] col = cols[index] if row == col: # add titles as # empty plot in the diagonal ax = fig.add_subplot(grids[row, col]) ax.text(0.5, 0.5, self.labels[row], verticalalignment='center', horizontalalignment='center', fontsize=10, fontweight='bold', transform=ax.transAxes) ax.set_axis_off() continue ax = fig.add_subplot(grids[row, col]) vector1 = self.matrix[:, row] vector2 = self.matrix[:, col] ax.text(0.2, 0.8, "{}={:.2f}".format(self.corr_method, corr_matrix[row, col]), horizontalalignment='left', transform=ax.transAxes) ax.get_yaxis().set_tick_params(which='both', left='off', right='off', direction='out') ax.get_xaxis().set_tick_params(which='both', top='off', bottom='off', direction='out') for tick in ax.xaxis.get_major_ticks(): tick.label.set_rotation('45') if col != num_samples - 1: ax.set_yticklabels([]) else: ax.yaxis.tick_right() ax.get_yaxis().set_tick_params(which='both', left='off', right='on', direction='out') if col - row == 1: ax.xaxis.tick_bottom() ax.get_xaxis().set_tick_params(which='both', top='off', bottom='on', direction='out') for tick in ax.xaxis.get_major_ticks(): tick.label.set_rotation('45') else: ax.set_xticklabels([]) ax.hist2d(vector1, vector2, bins=200, cmin=0.1) if xRange is not None: ax.set_xlim(xRange) else: ax.set_xlim(min_xvalue, ax.get_xlim()[1]) if yRange is not None: ax.set_ylim(min_yvalue, min(yRange, ax.get_ylim()[1])) else: ax.set_ylim(min_yvalue, ax.get_ylim()[1]) plt.savefig(plot_filename, format=image_format) plt.close()
ax1.scatter(pos[:,0], pos[:,1], c=noisy_signal[0].tolist(), cmap=plt.cm.jet) ax2.scatter(pos[:,0], pos[:,1], c=gl_denoised_signal[0].tolist(), cmap=plt.cm.jet) ax1.axis('off') ax2.axis('off') ax1.set_title('Noisy Signal') ax2.set_title('GL Signal') plt.tight_layout() plt.show() test_item=np.random.normal(size=dimension) true_payoff=np.dot(true_user_features, test_item) gl_payoff=np.dot(gl_user_f, test_item) pos=true_user_features graph=create_networkx_graph(user_num, true_adj) edge_color=true_adj[np.triu_indices(user_num,1)] plt.figure(figsize=(5,5)) nodes=nx.draw_networkx_nodes(graph, pos, node_color=true_payoff, node_size=100, cmap=plt.cm.jet) edges=nx.draw_networkx_edges(graph, pos, width=1.0, alpha=0.1, edge_color='grey') plt.axis('off') plt.title('True Graph', fontsize=12) plt.show() pos=true_user_features graph=create_networkx_graph(user_num, gl_adj) edge_color=gl_adj[np.triu_indices(user_num,1)] plt.figure(figsize=(5,5)) nodes=nx.draw_networkx_nodes(graph, pos, node_color=gl_payoff, node_size=100, cmap=plt.cm.jet) edges=nx.draw_networkx_edges(graph, pos, width=1.0, alpha=0.1, edge_color='grey') plt.axis('off')
def complexity_fd_higushi(signal, k_max): """ Computes Higuchi Fractal Dimension of a signal. Based on the `pyrem <https://github.com/gilestrolab/pyrem>`_ repo by Quentin Geissmann. Parameters ---------- signal : list or array List or array of values. k_max : int The maximal value of k. The point at which the FD plateaus is considered a saturation point and that kmax value should be selected (Gómez, 2009). Some studies use a value of 8 or 16 for ECG signal and other 48 for MEG. Returns ---------- fd_higushi : float The Higushi Fractal Dimension as float value. Example ---------- >>> import neurokit as nk >>> >>> signal = np.sin(np.log(np.random.sample(666))) >>> fd_higushi = nk.complexity_fd_higushi(signal, 8) Notes ---------- *Details* - **Higushi Fractal Dimension**: Higuchi proposed in 1988 an efficient algorithm for measuring the FD of discrete time sequences. As the reconstruction of the attractor phase space is not necessary, this algorithm is simpler and faster than D2 and other classical measures derived from chaos theory. FD can be used to quantify the complexity and self-similarity of a signal. HFD has already been used to analyse the complexity of brain recordings and other biological signals. *Authors* - Quentin Geissmann (https://github.com/qgeissmann) *Dependencies* - numpy *See Also* - pyrem package: https://github.com/gilestrolab/pyrem References ----------- - Accardo, A., Affinito, M., Carrozzi, M., & Bouquet, F. (1997). Use of the fractal dimension for the analysis of electroencephalographic time series. Biological cybernetics, 77(5), 339-350. - Gómez, C., Mediavilla, Á., Hornero, R., Abásolo, D., & Fernández, A. (2009). Use of the Higuchi's fractal dimension for the analysis of MEG recordings from Alzheimer's disease patients. Medical engineering & physics, 31(3), 306-313. """ signal = np.array(signal) L = [] x = [] N = signal.size km_idxs = np.triu_indices(k_max - 1) km_idxs = k_max - np.flipud(np.column_stack(km_idxs)) -1 km_idxs[:,1] -= 1 for k in range(1, k_max): Lk = 0 for m in range(0, k): #we pregenerate all idxs idxs = np.arange(1,int(np.floor((N-m)/k))) Lmk = np.sum(np.abs(signal[m+idxs*k] - signal[m+k*(idxs-1)])) Lmk = (Lmk*(N - 1)/(((N - m)/ k)* k)) / k Lk += Lmk if Lk != 0: L.append(np.log(Lk/(m+1))) x.append([np.log(1.0/ k), 1]) (p, r1, r2, s)=np.linalg.lstsq(x, L) fd_higushi = p[0] return (fd_higushi)
def cluster_compartments(cf, k, chrlist, eig_dim=None, contact_thr=1, max_sample_size=50000, outlier_pctl=90, corr_outlier_pctl=[5, 95], balance_corr_median=False, coeffs=None, coeffs_gw=None, seed=None, max_resampling_attempts=10, rearrange_clusters=False, use_ice=False, algorithm='eigh-kmeans', outdir='.', out_allchr='clusters_all.txt'): if algorithm not in ['eigh-gmix', 'eigh-kmeans', 'spec-kmeans']: print "error: algorithm must be either 'eigh-gmix', 'eigh-kmeans' or 'spec-kmeans'" return print "[intrachromosomal_clusters] k={}, outdir={}, algorithm={}".format( k, outdir, algorithm) if not use_ice: if coeffs is None and coeffs_gw is None: print 'computing normalization coeffs (local masked OE)...' coeffs = oe_coeffs_mask(cf, cf.chromnames) elif coeffs is None and coeffs_gw is not None: print 'using user-provided global OE coeffs' else: print 'using ICE balancing coeffs from cooler file' if eig_dim == None: eig_dim = k clusters = {} sample_idx = {} clusters_idx = {} for chr in chrlist: if os.path.isfile('{}/clusters_{}.txt'.format(outdir, chr)): print "Warning: {} clusters ({}/clusters_{}.txt) already exist. Skipping chromosome.".format( chr, outdir, chr) continue print "[{}] balancing matrix...".format(chr) if not use_ice: m = cf.matrix(balance=False).fetch(chr) # Threshold contacts m[np.where(m < contact_thr)] = 0 if coeffs_gw is not None: m_oe = oe_apply(m, coeffs_gw).toarray() else: m_oe = oe_apply(m, coeffs[chr]).toarray() else: m_oe = cf.matrix(balance=True).fetch(chr) # Get idx of high quality regions (measured in raw matrix). samp_idx = matrix_mask_idx(m_oe) sample_idx[chr] = samp_idx print "[{}] removing low-quality regions (matrix rows: {}, sample rows: {})...".format( chr, m.shape[0], samp_idx.shape[0]) # High-quality matrix size l = len(samp_idx) ssize = min(l, max_sample_size) # Sample iteration (keep sampling while clustering fails). np.random.seed(seed) successful = False cnt = 0 while not successful and cnt < max_resampling_attempts: cnt += 1 # Get sample if ssize < l: s = np.sort(np.random.choice(samp_idx, ssize, replace=False)) else: s = np.array(samp_idx) m_samp = m_oe[s, :][:, s] # Relax outliers m_max = np.percentile(m_samp[np.where(m_samp > 0)], outlier_pctl) m_samp[np.where(m_samp > m_max)] = m_max if (~m_samp.any(axis=1)).any(): print "[{}] sample contains empty rows (singular matrix). resampling ({})...".format( chr, cnt) continue # Remove diagonals before correlation (DISABLED) ''' if pre_corr_diags > 0: m_cor = np.corrcoef(np.triu(m_samp,pre_corr_diags) + np.tril(m_samp,-pre_corr_diags)) else: m_cor = np.corrcoef(m_samp) # Remove diagonals after correlation if corr_diags > 1: m_cor = np.triu(m_cor,corr_diags) + np.tril(m_cor,-corr_diags) else: np.fill_diagonal(m_cor,0) ''' # Compute correlation and remove diagonal print "[{}] computing correlation matrix and balancing...".format( chr) m_cor = np.corrcoef(m_samp) np.fill_diagonal(m_cor, 0) # Increase correlation contrast (5-95 percentiles by default) if balance_corr_median: m_cor = m_cor - np.median(m_cor[np.triu_indices(ssize, 1)]) min_cor_val = np.percentile(m_cor[np.triu_indices(ssize, 1)], corr_outlier_pctl[0]) max_cor_val = np.percentile(m_cor[np.triu_indices(ssize, 1)], corr_outlier_pctl[1]) m_cor[np.where(m_cor < min_cor_val)] = min_cor_val m_cor[np.where(m_cor > max_cor_val)] = max_cor_val N = m_cor.shape[0] eig_dim = min(N, eig_dim) try: print "[{}] computing clusters, algorithm {}...".format( chr, algorithm) if algorithm == 'spec-kmeans': # some chromosomes crash when using precomputed similarity matrices. # however using RBF seems to give meaningful clustering. spect_clu = SpectralClustering(n_clusters=k, eigen_solver='arpack', affinity='precomputed', assign_labels='kmeans', n_jobs=8) hic_clust = spect_clu.fit_predict(m_cor) else: print "[{}] computing eigh...".format(chr) w, v = scipy.linalg.eigh(m_cor, eigvals=(N - eig_dim, N - 1)) if algorithm == 'eigh-gmix': # Cluster eigenvectors using Gaussian Mixture gmix = mixture.GaussianMixture(n_components=k, covariance_type='full', tol=1e-4, max_iter=1000) gmix.fit(v) hic_clust = gmix.predict(v) elif algorithm == 'eigh-kmeans': # Cluster eigenvalue/eigenvector products with kmeans. print "[{}] computing clusters (k-means)...".format( chr) km = KMeans(n_clusters=k, n_jobs=8) weig = w * v hic_clust = km.fit_predict(weig) # Write weighted eigenvectors with open('{}/clusters_{}.weig'.format(outdir, chr), 'w') as outdata: for i in xrange(0, len(hic_clust)): outdata.write( str(sample_idx[chr][i]) + '\t' + str(hic_clust[i]) + '\t' + '\t'.join([str(x) for x in weig[i][::-1]]) + '\n') except Exception, e: print "[{}] error while clustering: {}".format( chr, cnt, str(e)) cnt = max_resampling_attempts break successful = True if cnt >= max_resampling_attempts: print "[{}] max reampling attempts reached, skipping chromosome.".format( chr) continue # Rearrange clusters for visualization # Make cluster index list clu_idx = [list() for _ in xrange(k)] for i, c in enumerate(hic_clust): clu_idx[c].append(i) if not rearrange_clusters: # Map again to matrix indices clusters_idx[chr] = [sample_idx[chr][x] for x in clu_idx] else: print "[{}] rearranging clusters by similarity...".format(chr) for i in xrange(k): clu_idx[i] = np.array(clu_idx[i]) clusters[chr] = list() # Find most distant blocks l_r = (0, 0) val = np.inf d_sum = np.zeros((k, k)) for i in xrange(k): l_i = len(clu_idx[i]) for j in xrange(i + 1, k): l_j = len(clu_idx[j]) d_sum[i, j] = np.sum(m_cor[clu_idx[i], :][:, clu_idx[j]]) d = float(d_sum[i, j]) / (l_i * l_j) if d < val: l_r = (i, j) val = d # Pop left and right blocks (important to do it in this order for index consistency). r_idx = clu_idx.pop(l_r[1]) l_idx = clu_idx.pop(l_r[0]) r_clusters = [ r_idx.copy(), ] l_clusters = [ l_idx.copy(), ] iters = len(clu_idx) / 2 + len(clu_idx) % 2 for i in xrange(iters): # Find nearest blocks to L/R. len_l = len(l_idx) len_r = len(r_idx) min_d = np.inf max_d = -np.inf min_idx = 0 max_idx = 0 for i in xrange(len(clu_idx)): len_block = len(clu_idx[i]) d_l = float(np.sum(m_cor[l_idx, :][:, clu_idx[i]])) / ( len_l * len_block) - val d_r = float(np.sum(m_cor[r_idx, :][:, clu_idx[i]])) / ( len_r * len_block) - val r = d_l / d_r if r < min_d: min_idx = i min_d = r if r >= max_d: max_idx = i max_d = r # Pop from idx and add to L/R. if min_idx > max_idx: r_clusters.append(clu_idx[min_idx].copy()) l_clusters.append(clu_idx[max_idx].copy()) r_idx = np.append(clu_idx.pop(min_idx), r_idx) l_idx = np.append(l_idx, clu_idx.pop(max_idx)) elif min_idx < max_idx: r_clusters.append(clu_idx[min_idx].copy()) l_clusters.append(clu_idx[max_idx].copy()) l_idx = np.append(l_idx, clu_idx.pop(max_idx)) r_idx = np.append(clu_idx.pop(min_idx), r_idx) else: l_clusters.append(clu_idx[max_idx].copy()) l_idx = np.append(l_idx, clu_idx.pop(max_idx)) # Make final index list. clu_idx = np.append(l_idx, r_idx) # Make final cluster index list. clusters[chr] = l_clusters + list(reversed(r_clusters)) # Map again to matrix indices clusters_idx[chr] = [sample_idx[chr][x] for x in clusters[chr]] # Store in disk print "[{}] writing clusters to {}/clusters_{}.txt...".format( chr, outdir, chr) fout = open('{}/clusters_{}.txt'.format(outdir, chr), 'w+') for c in clusters_idx[chr]: fout.write("{}\t".format(chr)) fout.write(','.join([str(i) for i in c])) fout.write('\n') fout.close() fall = open('{}/{}'.format(outdir, out_allchr), "a") for c in clusters_idx[chr]: fall.write("{}\t".format(chr)) fall.write(','.join([str(i) for i in c])) fall.write('\n') fall.close() '''
def _from_rdkit(cls, mol, rdkit_config): if rdkit_config.set_hydrogen_explicit: mol = Chem.AddHs(mol) g = nx.Graph() # For single heavy-atom molecules, such as water, methane and metalic ion. # A ghost atom is created and bond to it, because there must be at least # two nodes and one edge in graph kernel. if mol.GetNumBonds() == 0: for i, atom in enumerate(mol.GetAtoms()): assert (atom.GetIdx() == i) g.add_node(i) rdkit_config.set_node(g.nodes[i], atom, mol) if mol.GetNumAtoms() == 1: ij = (0, 0) g.add_edge(*ij) rdkit_config.set_ghost_edge(g.edges[ij]) else: I, J = np.triu_indices(mol.GetNumAtoms(), k=1) for i in range(len(I)): ij = (I[i], J[i]) g.add_edge(*ij) rdkit_config.set_ghost_edge(g.edges[ij]) else: for i, atom in enumerate(mol.GetAtoms()): assert (atom.GetIdx() == i) g.add_node(i) rdkit_config.set_node(g.nodes[i], atom, mol) for bond in mol.GetBonds(): ij = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) g.add_edge(*ij) rdkit_config.set_edge(g.edges[ij], bond) # set ring stereo if rdkit_config.set_ring_stereo: bond_orientation_dict = get_bond_orientation_dict(mol) for ring_idx in mol.GetRingInfo().AtomRings(): atom_updown = [] for idx in ring_idx: if g.nodes[idx]['Ring_count'] != 1: atom_updown.append(0) else: atom = mol.GetAtomWithIdx(idx) atom_updown.append( get_atom_ring_stereo( mol, atom, ring_idx, depth=rdkit_config.depth, bond_orientation_dict=bond_orientation_dict)) atom_updown = np.array(atom_updown) for j in range(len(ring_idx)): b = j e = j + 1 if j != len(ring_idx) - 1 else 0 StereoOfRingBond = float(atom_updown[b] * atom_updown[e] * len(ring_idx)) if ring_idx[b] < ring_idx[e]: ij = (ring_idx[b], ring_idx[e]) else: ij = (ring_idx[e], ring_idx[b]) if g.edges[ij]['RingStereo'] != 0.: raise Exception(ij, g.edges[ij]['RingStereo'], StereoOfRingBond) else: g.edges[ij]['RingStereo'] = StereoOfRingBond # rdkit_config.set_node_propogation(g, mol, 'Chiral', depth=1) rdkit_config.set_node_propogation(g, mol, 'AtomicNumber', depth=5, sum=False, usehash=False) rdkit_config.set_node_propogation(g, mol, 'Hcount', depth=1, sum=True, usehash=False) # rdkit_config.set_node_propogation(g, mol, 'FirstNeighbors', depth=4) # rdkit_config.set_node_propogation(g, mol, 'Aromatic', depth=4) return _from_networkx(cls, g)
def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete', affinity="euclidean", return_distance=False): """Linkage agglomerative clustering based on a Feature matrix. The inertia matrix uses a Heapq-based representation. This is the structured version, that takes into account some topological structure between samples. Read more in the :ref:`User Guide <hierarchical_clustering>`. Parameters ---------- X : array, shape (n_samples, n_features) feature matrix representing n_samples samples to be clustered connectivity : sparse matrix (optional). connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured. n_clusters : int (optional) Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix. linkage : {"average", "complete", "single"}, optional, default: "complete" Which linkage criteria to use. The linkage criterion determines which distance to use between sets of observation. - average uses the average of the distances of each observation of the two sets - complete or maximum linkage uses the maximum distances between all observations of the two sets. - single uses the minimum of the distances between all observations of the two sets. affinity : string or callable, optional, default: "euclidean". which metric to use. Can be "euclidean", "manhattan", or any distance know to paired distance (see metric.pairwise) return_distance : bool, default False whether or not to return the distances between the clusters. Returns ------- children : 2D array, shape (n_nodes-1, 2) The children of each non-leaf node. Values less than `n_samples` correspond to leaves of the tree which are the original samples. A node `i` greater than or equal to `n_samples` is a non-leaf node and has children `children_[i - n_samples]`. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i` n_connected_components : int The number of connected components in the graph. n_leaves : int The number of leaves in the tree. parents : 1D array, shape (n_nodes, ) or None The parent of each node. Only returned when a connectivity matrix is specified, elsewhere 'None' is returned. distances : ndarray, shape (n_nodes-1,) Returned when return_distance is set to True. distances[i] refers to the distance between children[i][0] and children[i][1] when they are merged. See also -------- ward_tree : hierarchical clustering with ward linkage """ X = np.asarray(X) if X.ndim == 1: X = np.reshape(X, (-1, 1)) n_samples, n_features = X.shape linkage_choices = { 'complete': _hierarchical.max_merge, 'average': _hierarchical.average_merge, 'single': None } # Single linkage is handled differently try: join_func = linkage_choices[linkage] except KeyError: raise ValueError('Unknown linkage option, linkage should be one ' 'of %s, but %s was given' % (linkage_choices.keys(), linkage)) if connectivity is None: from scipy.cluster import hierarchy # imports PIL if n_clusters is not None: warnings.warn( 'Partial build of the tree is implemented ' 'only for structured clustering (i.e. with ' 'explicit connectivity). The algorithm ' 'will build the full tree and only ' 'retain the lower branches required ' 'for the specified number of clusters', stacklevel=2) if affinity == 'precomputed': # for the linkage function of hierarchy to work on precomputed # data, provide as first argument an ndarray of the shape returned # by pdist: it is a flat array containing the upper triangular of # the distance matrix. i, j = np.triu_indices(X.shape[0], k=1) X = X[i, j] elif affinity == 'l2': # Translate to something understood by scipy affinity = 'euclidean' elif affinity in ('l1', 'manhattan'): affinity = 'cityblock' elif callable(affinity): X = affinity(X) i, j = np.triu_indices(X.shape[0], k=1) X = X[i, j] out = hierarchy.linkage(X, method=linkage, metric=affinity) children_ = out[:, :2].astype(np.int, copy=False) if return_distance: distances = out[:, 2] return children_, 1, n_samples, None, distances return children_, 1, n_samples, None connectivity, n_connected_components = _fix_connectivity(X, connectivity, affinity=affinity) connectivity = connectivity.tocoo() # Put the diagonal to zero diag_mask = (connectivity.row != connectivity.col) connectivity.row = connectivity.row[diag_mask] connectivity.col = connectivity.col[diag_mask] connectivity.data = connectivity.data[diag_mask] del diag_mask if affinity == 'precomputed': distances = X[connectivity.row, connectivity.col].astype('float64', **_astype_copy_false(X)) else: # FIXME We compute all the distances, while we could have only computed # the "interesting" distances distances = paired_distances(X[connectivity.row], X[connectivity.col], metric=affinity) connectivity.data = distances if n_clusters is None: n_nodes = 2 * n_samples - 1 else: assert n_clusters <= n_samples n_nodes = 2 * n_samples - n_clusters if linkage == 'single': return _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters, n_connected_components, return_distance) if return_distance: distances = np.empty(n_nodes - n_samples) # create inertia heap and connection matrix A = np.empty(n_nodes, dtype=object) inertia = list() # LIL seems to the best format to access the rows quickly, # without the numpy overhead of slicing CSR indices and data. connectivity = connectivity.tolil() # We are storing the graph in a list of IntFloatDict for ind, (data, row) in enumerate(zip(connectivity.data, connectivity.rows)): A[ind] = IntFloatDict(np.asarray(row, dtype=np.intp), np.asarray(data, dtype=np.float64)) # We keep only the upper triangular for the heap # Generator expressions are faster than arrays on the following inertia.extend( _hierarchical.WeightedEdge(d, ind, r) for r, d in zip(row, data) if r < ind) del connectivity heapify(inertia) # prepare the main fields parent = np.arange(n_nodes, dtype=np.intp) used_node = np.ones(n_nodes, dtype=np.intp) children = [] # recursive merge loop for k in range(n_samples, n_nodes): # identify the merge while True: edge = heappop(inertia) if used_node[edge.a] and used_node[edge.b]: break i = edge.a j = edge.b if return_distance: # store distances distances[k - n_samples] = edge.weight parent[i] = parent[j] = k children.append((i, j)) # Keep track of the number of elements per cluster n_i = used_node[i] n_j = used_node[j] used_node[k] = n_i + n_j used_node[i] = used_node[j] = False # update the structure matrix A and the inertia matrix # a clever 'min', or 'max' operation between A[i] and A[j] coord_col = join_func(A[i], A[j], used_node, n_i, n_j) for l, d in coord_col: A[l].append(k, d) # Here we use the information from coord_col (containing the # distances) to update the heap heappush(inertia, _hierarchical.WeightedEdge(d, k, l)) A[k] = coord_col # Clear A[i] and A[j] to save memory A[i] = A[j] = 0 # Separate leaves in children (empty lists up to now) n_leaves = n_samples # # return numpy array for efficient caching children = np.array(children)[:, ::-1] if return_distance: return children, n_connected_components, n_leaves, parent, distances return children, n_connected_components, n_leaves, parent
def distance_matrix(s, max_dist=None, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, block=None, parallel=False, use_c=False, use_nogil=False, show_progress=False): """Distance matrix for all sequences in s. :param s: Iterable of series :param window: see :meth:`distance` :param max_dist: see :meth:`distance` :param max_step: see :meth:`distance` :param max_length_diff: see :meth:`distance` :param penalty: see :meth:`distance` :param psi: see :meth:`distance` :param block: Only compute block in matrix. Expects tuple with begin and end, e.g. ((0,10),(20,25)) will only compare rows 0:10 with rows 20:25. :param parallel: Use parallel operations :param use_c: Use c compiled Python functions (it is recommended to use use_nogil) :param use_nogil: Use pure c functions :param show_progress: Show progress using the tqdm library """ if parallel and (not use_c or not use_nogil): try: import multiprocessing as mp logger.info('Using multiprocessing') except ImportError: parallel = False mp = None else: mp = None dist_opts = { 'max_dist': max_dist, 'max_step': max_step, 'window': window, 'max_length_diff': max_length_diff, 'penalty': penalty, 'psi': psi } s = SeriesContainer.wrap(s) dists = None if max_length_diff is None: max_length_diff = np.inf large_value = np.inf logger.info('Computing distances') if use_c: for k, v in dist_opts.items(): if v is None: dist_opts[k] = 0.0 if use_c and use_nogil: logger.info("Compute distances in pure C") dist_opts['block'] = block if parallel: logger.info("Use parallel computation") dists = dtw_c.distance_matrix_nogil_p(s, **dist_opts) else: logger.info("Use serial computation") dists = dtw_c.distance_matrix_nogil(s, **dist_opts) if use_c and not use_nogil: logger.info("Compute distances in Python compiled C") if parallel: logger.info("Use parallel computation") dists = np.zeros((len(s), len(s))) + large_value if block is None: idxs = np.triu_indices(len(s), k=1) else: idxsl_r = [] idxsl_c = [] for r in range(block[0][0], block[0][1]): for c in range(max(r + 1, block[1][0]), min(len(s), block[1][1])): idxsl_r.append(r) idxsl_c.append(c) idxs = (np.array(idxsl_r), np.array(idxsl_c)) with mp.Pool() as p: dists[idxs] = p.map(_distance_c_with_params, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) # pbar = tqdm(total=int((len(s)*(len(s)-1)/2))) # for r in range(len(s)): # dists[r,r+1:len(s)] = p.map(distance, [(s[r],s[c], dist_opts) for c in range(r+1,len(cur))]) # pbar.update(len(s) - r - 1) # pbar.close() else: logger.info("Use serial computation") dist_opts['block'] = block dists = dtw_c.distance_matrix(s, **dist_opts) if not use_c: logger.info("Compute distances in Python") if parallel: logger.info("Use parallel computation") dists = np.zeros((len(s), len(s))) + large_value if block is None: idxs = np.triu_indices(len(s), k=1) else: idxsl_r = [] idxsl_c = [] for r in range(block[0][0], block[0][1]): for c in range(max(r + 1, block[1][0]), min(len(s), block[1][1])): idxsl_r.append(r) idxsl_c.append(c) idxs = (np.array(idxsl_r), np.array(idxsl_c)) with mp.Pool() as p: dists[idxs] = p.map(_distance_with_params, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) # pbar = tqdm(total=int((len(s)*(len(s)-1)/2))) # for r in range(len(s)): # dists[r,r+1:len(s)] = p.map(distance, [(s[r],s[c], dist_opts) for c in range(r+1,len(cur))]) # pbar.update(len(s) - r - 1) # pbar.close() else: logger.info("Use serial computation") dists = np.zeros((len(s), len(s))) + large_value if block is None: it_r = range(len(s)) else: it_r = range(block[0][0], block[0][1]) if show_progress: it_r = tqdm(it_r) for r in it_r: if block is None: it_c = range(r + 1, len(s)) else: it_c = range(max(r + 1, block[1][0]), min(len(s), block[1][1])) for c in it_c: if abs(len(s[r]) - len(s[c])) <= max_length_diff: dists[r, c] = distance(s[r], s[c], **dist_opts) return dists
def Fst_predict(vector_lib,m_coeff,b,n_comp= 5,pop_max= 8,Iter= 20,bias_range= [20,300],Eigen= False, Scale= False,Centre= True,ploidy= 1): ### Select pre and post processing measures. length_haps= vector_lib.shape[1] print('length haps: {}, N iterations: {}, range pops: {}'.format(length_haps,Iter,pop_max)) #### Predict predicted= [] #def controled_fsts(vector_lib,Eigen,length_haps,Scale,Center,N_pops,n_comp,Iter,N_sims,MixL,MixP,Pairs): lengths_vector= [] ### store distances between centroids biased_pairwise= [] ### store PC projection: dist_PC_corrected= {x:[] for x in range(n_comp)} ### store fsts fst_store= [] ### proceed. for rep in range(Iter): N_pops= np.random.choice(range(3,pop_max),1,replace= False)[0] ## Population Sizes and labels bias_scheme= np.random.choice(range(bias_range[0],bias_range[1]),N_pops,replace= False) bias_labels= np.repeat(np.array([x for x in range(N_pops)]),bias_scheme) ### triangular matrices extract. iu1= np.triu_indices(N_pops,1) # for centroid comparison iu_bias= np.triu_indices(sum(bias_scheme),1) iu_control= np.triu_indices(2,1) Pops= np.random.choice(vector_lib.shape[0],N_pops,replace= False) #print('Iter: {}, vectors selected: {}, hap length: {}'.format(rep,Pops,length_haps)) ########## FST freqs_selected= vector_lib[Pops,:length_haps] Pairwise= Ste.return_fsts2(freqs_selected) #fsts_compare = scale(Pairwise.fst) fsts_compare= Pairwise.fst fst_store.extend(fsts_compare) ## lengths lengths_vector.extend([length_haps] * len(fsts_compare)) #### generate data and perform PCA data= [] for k in range(N_pops): probs= vector_lib[Pops[k],:] m= bias_scheme[k] Haps= [[np.random.choice([ploidy,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)] data.extend(Haps) data2= np.array(data) if Scale: data2= scale(data2) pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(data2) feat_bias= pca.transform(data2) if Eigen: feat_bias= feat_bias * pca.explained_variance_ratio_ #### Centroid distances bias_centroids= [np.mean(feat_bias[[y for y in range(feat_bias.shape[0]) if bias_labels[y] == z],:],axis= 0) for z in range(N_pops)] bias_centroids= np.array(bias_centroids) bias_pair_dist= pairwise_distances(bias_centroids,metric= 'euclidean') bias_pair_dist= bias_pair_dist[iu1] #bias_pair_dist= scale(bias_pair_dist) fst_pred= [np.exp(m_coeff*np.log(x) + b) for x in bias_pair_dist] predicted.extend(fst_pred) fig= [go.Scatter( x= fst_store, y= predicted, mode= 'markers' )] layout = go.Layout( title= 'test of prediction', yaxis=dict( title='predicted Fst'), xaxis=dict( title='observed Fst') ) fig= go.Figure(data=fig, layout=layout) iplot(fig)
def disthist_match(self, calls, pos): """ """ coll = [] for _ in range(calls): bigb_data_idxs, bigb_data_classes = self.spc_batchfinder( self.bigbs) bigb_dict = {} for i, bigb_cls in enumerate(bigb_data_classes): if bigb_cls not in bigb_dict: bigb_dict[bigb_cls] = [] bigb_dict[bigb_cls].append(i) bigbatch = self.storage[bigb_data_idxs] if self.low_proj_dim > 0: low_dim_proj = nn.Linear(bigbatch.shape[-1], self.low_proj_dim, bias=False) with torch.no_grad(): bigbatch = low_dim_proj(bigbatch) bigbatch = bigbatch.numpy() bigb_distmat_triu_idxs = np.triu_indices(len(bigbatch), 1) bigb_distvals = self.get_distmat(bigbatch)[bigb_distmat_triu_idxs] bigb_disthist_range, bigb_disthist_bins = ( np.min(bigb_distvals), np.max(bigb_distvals)), 50 bigb_disthist, _ = np.histogram(bigb_distvals, bins=bigb_disthist_bins, range=bigb_disthist_range) bigb_disthist = bigb_disthist / np.sum(bigb_disthist) bigb_mu = np.mean(bigbatch, axis=0) bigb_std = np.std(bigbatch, axis=0) cost_collect, bigb_idxs = [], [] for _ in range(self.num_batch_comps): subset_idxs = [ np.random.choice(bigb_dict[np.random.choice( list(bigb_dict.keys()))], self.samples_per_class, replace=False) for _ in range(self.batch_size // self.samples_per_class) ] subset_idxs = [x for y in subset_idxs for x in y] # subset_idxs = sorted(np.random.choice(len(bigbatch), batch_size, replace=False)) bigb_idxs.append(subset_idxs) subset = bigbatch[subset_idxs, :] subset_distmat = self.get_distmat(subset) subset_distmat_triu_idxs = np.triu_indices( len(subset_distmat), 1) subset_distvals = self.get_distmat( subset)[subset_distmat_triu_idxs] subset_disthist_range, subset_disthist_bins = ( np.min(subset_distvals), np.max(subset_distvals)), 50 subset_disthist, _ = np.histogram(subset_distvals, bins=bigb_disthist_bins, range=bigb_disthist_range) subset_disthist = subset_disthist / np.sum(subset_disthist) subset_mu = np.mean(subset, axis=0) subset_std = np.std(subset, axis=0) dist_wd = wasserstein_distance( bigb_disthist, subset_disthist) + wasserstein_distance( subset_disthist, bigb_disthist) cost = np.linalg.norm(bigb_mu - subset_mu) + np.linalg.norm( bigb_std - subset_std) + 75 * dist_wd cost_collect.append(cost) bigb_ix = bigb_idxs[np.argmin(cost_collect)] bigb_data_ix = bigb_data_idxs[bigb_ix] coll.append(bigb_data_ix) return coll
def classification(Train_Graphs, Test_Graphs, Train_Labels, Test_Labels, FS_strategy=None, K=None): """ Classify (regular) unimodal connectomes and get performance scores (i.e, accuracy, sensitivity, specificity) Parameters: ---------- Train_Graphs : 3-D array with shape (~ (Fold-1)*N_m/Fold, n_m, n_m) Test_Graphs : 3-D array with shape (~ N_m/Fold, n_m, n_m) Train_Labels : 1-D label array with length ~ (Fold-1)*N_m/Fold Test_Labels : 1-D label array with length ~ N_m/Fold FS_strategy : "SNF", "Averaging" or None (default), Feature selection method to calculate the representative graphs from each class that are used to identify the most discriminative connectomic features. If 'SNF', the representative graphs are created with a graph fusion process (for more information, see "Similarity Netwok Fusion"). If "Averaging", the representative graphs are created by simply averaging graphs of each class. If "None", no feature selection method to apply and all connectomic features of graphs are used in classification (all upper off-diagonal elements of graphs). K : int or None (default), Number of most discriminative features set by user to be used in later classification. If "FS_strategy" is "SNF" or "Averaging", "K" should be a positive integer less than nt*(nt-1)/2. If "FS_strategy" is "None", then K should be "None" too. Return: ------- out : array of 3 performance metrics (i.e., accuracy, sensitivity, specificity) """ if Train_Graphs.shape[1:] == Test_Graphs.shape[1:]: n = Train_Graphs.shape[1] else: raise ValueError( 'Shapes of connectomes in "Train_Graphs" and "Test_Graphs" are different' ) if FS_strategy in ['SNF', 'Averaging']: if K == None: raise ValueError('Provide a proper K for feature selection (FS)') elif 2 * K > n * (n - 1): raise ValueError('K is too large for the current graph size (n)') elif (FS_strategy == None) and (K != None): raise ValueError('K can be used with a feature selection (FS)') else: raise ValueError('Invalid "FS_strategy", use "SNF" or "Averaging"') if FS_strategy != None: print(f'\n\nFS: {FS_strategy}') indices = determine_features_to_select(Train_Graphs, Train_Labels, FS_strategy=FS_strategy, K=K) TR_for_SVM = np.array( [graph[indices[:, 0], indices[:, 1]] for graph in Train_Graphs]) TST_for_SVM = np.array( [graph[indices[:, 0], indices[:, 1]] for graph in Test_Graphs]) else: TR_for_SVM = np.array( [graph[np.triu_indices(n, 1)] for graph in Train_Graphs]) TST_for_SVM = np.array( [graph[np.triu_indices(n, 1)] for graph in Test_Graphs]) Test_Labels_Pred = predict_test_labels(TR_for_SVM, TST_for_SVM, Train_Labels) Scores = calculate_scores(Test_Labels, Test_Labels_Pred) return Scores # Acc, Sens, Spec
#highlight = np.zeros(X_full.shape[0]) #highlight[sorted(dense_idx)] = 1 # #adata = AnnData(X=X_full) #adata.obs['highlight'] = highlight #sc.pp.neighbors(adata) #sc.tl.umap(adata) #sc.pl.scatter( # adata, color='highlight', basis='umap', # save='_{}_highlight_dense_all.png'.format(NAMESPACE) #) #exit() n_features = Xs[0].shape[0] n_correlations = int(comb(n_features, 2) + n_features) triu_idx = np.triu_indices(n_features) print(len(nonzero_idx)) nonzero_tup = ([ ni[0] for ni in sorted(nonzero_idx) ], [ ni[1] for ni in sorted(nonzero_idx) ]) Xs_dimred = [ X[nonzero_tup].A.flatten() for X in Xs ] #analyze_dense(Xs, Xs_dimred, sparsities, node_sizes) #srp = SparseRandomProjection( # eps=0.1, random_state=69 #).fit(ss.csr_matrix((len(Xs), n_correlations)))
def triu_indices(*args, **kwargs): return tuple(map(torch.from_numpy, _np.triu_indices(*args, **kwargs)))
def setUp(self): # Let's consider a 100x100x10 km grid with an homogeneous velocity Vp=6 # km/s self.Vp = 6 # Seismic stations are all at z=0km, and placed every 2 km in x and y # directions, from 0 to 10 km positive z axis upwards self.sta = {} self.sta[1] = {'x': 0, 'y': 0, 'depth': 0, 'elev': 0, 'station': 1} self.sta[2] = {'x': 40, 'y': 0, 'depth': 0, 'elev': 0, 'station': 2} self.sta[3] = {'x': 80, 'y': 0, 'depth': 0, 'elev': 0, 'station': 3} self.sta[4] = {'x': 20, 'y': 20, 'depth': 0, 'elev': 0, 'station': 4} self.sta[5] = {'x': 60, 'y': 20, 'depth': 0, 'elev': 0, 'station': 5} self.sta[6] = {'x': 100, 'y': 20, 'depth': 0, 'elev': 0, 'station': 6} self.sta[7] = {'x': 0, 'y': 40, 'depth': 0, 'elev': 0, 'station': 7} self.sta[8] = {'x': 40, 'y': 40, 'depth': 0, 'elev': 0, 'station': 8} self.sta[9] = {'x': 80, 'y': 40, 'depth': 0, 'elev': 0, 'station': 9} self.sta[10] = {'x': 20, 'y': 60, 'depth': 0, 'elev': 0, 'station': 10} self.sta[11] = {'x': 60, 'y': 60, 'depth': 0, 'elev': 0, 'station': 11} self.sta[12] = { 'x': 100, 'y': 60, 'depth': 0, 'elev': 0, 'station': 12 } self.sta[13] = {'x': 0, 'y': 80, 'depth': 0, 'elev': 0, 'station': 13} self.sta[14] = {'x': 40, 'y': 80, 'depth': 0, 'elev': 0, 'station': 14} self.sta[15] = {'x': 80, 'y': 80, 'depth': 0, 'elev': 0, 'station': 15} self.sta[16] = { 'x': 20, 'y': 100, 'depth': 0, 'elev': 0, 'station': 16 } self.sta[17] = { 'x': 60, 'y': 100, 'depth': 0, 'elev': 0, 'station': 17 } self.sta[18] = { 'x': 100, 'y': 100, 'depth': 0, 'elev': 0, 'station': 18 } self.area = [0, 100, 0, 100, -10, 0] # Let's assume 5 seismic events occurring at the same place # (x=50,y=50,z=-5) but not at the same time self.cluster = [1, 2, 3, 4, 5] self.N = len(self.cluster) # Define true hypocentral parameters # positive z axis downwards self.locs_true = [] self.locs_true.append({ 'x_mean': 50.2, 'y_mean': 49.7, 'z_mean': 4.5, 'o_time': utcdatetime.UTCDateTime('2010-01-01T12: 00: 00.0000Z') }) self.locs_true.append({ 'x_mean': 50.3, 'y_mean': 49.9, 'z_mean': 4.75, 'o_time': utcdatetime.UTCDateTime('2010-01-01T12: 01: 00.0000Z') }) self.locs_true.append({ 'x_mean': 49.8, 'y_mean': 50.1, 'z_mean': 5.25, 'o_time': utcdatetime.UTCDateTime('2010-01-01T12: 02: 00.0000Z') }) self.locs_true.append({ 'x_mean': 49.7, 'y_mean': 50.4, 'z_mean': 5.5, 'o_time': utcdatetime.UTCDateTime('2010-01-01T12: 03: 00.0000Z') }) self.locs_true.append({ 'x_mean': 50.0, 'y_mean': 49.9, 'z_mean': 5, 'o_time': utcdatetime.UTCDateTime('2010-01-01T12: 04: 00.0000Z') }) centroid_x_true = np.mean([loc['x_mean'] for loc in self.locs_true]) centroid_y_true = np.mean([loc['y_mean'] for loc in self.locs_true]) centroid_z_true = np.mean([loc['z_mean'] for loc in self.locs_true]) # Measured hypocentral parameters # positive z-axis downwards err_x = [0, 0, 0, 0, 0] err_y = [0, 0, 0, 0, 0] err_z = [0, 0, 0, 0, 0] err_to = [0, 0, 0, 0, 0] err_x = [0.2, 0.3, -0.2, -0.3, 0] err_y = [-0.3, -0.1, 0.1, 0.4, -0.1] err_z = [-0.5, -0.25, 0.25, 0.5, 0] err_to = [2, 4, -2, 1, -4] self.locs_mes = [] for i in range(len(self.locs_true)): self.locs_mes.append({ 'x_mean': self.locs_true[i]['x_mean'] + err_x[i], 'y_mean': self.locs_true[i]['y_mean'] + err_y[i], 'z_mean': self.locs_true[i]['z_mean'] + err_z[i], 'o_time': self.locs_true[i]['o_time'] + err_to[i] }) centroid_x_mes = np.mean([loc['x_mean'] for loc in self.locs_mes]) centroid_y_mes = np.mean([loc['y_mean'] for loc in self.locs_mes]) centroid_z_mes = np.mean([loc['z_mean'] for loc in self.locs_mes]) # Input parameters self.threshold = 0.8 self.nbmin = 3 # Compute the traveltimes and arrival times self.ttimes_true = {} self.atimes_true = {} self.ttimes_mes = {} self.atimes_mes = {} for staname in self.sta.keys(): xsta = self.sta[staname]['x'] ysta = self.sta[staname]['y'] zsta = -self.sta[staname]['elev'] # positive z-axis downwards self.ttimes_true[staname] = [] self.atimes_true[staname] = [] self.ttimes_mes[staname] = [] self.atimes_mes[staname] = [] for j in range(self.N): d_true = np.sqrt((xsta - self.locs_true[j]['x_mean'])**2 + (ysta - self.locs_true[j]['y_mean'])**2 + (zsta - self.locs_true[j]['z_mean'])**2) self.ttimes_true[staname].append(d_true / self.Vp) self.atimes_true[staname].append(self.locs_true[j]['o_time'] + self.ttimes_true[staname][j]) d_mes = np.sqrt((xsta - self.locs_mes[j]['x_mean'])**2 + (ysta - self.locs_mes[j]['y_mean'])**2 + (zsta - self.locs_mes[j]['z_mean'])**2) self.ttimes_mes[staname].append(d_mes / self.Vp) self.atimes_mes[staname].append(self.locs_mes[j]['o_time'] + self.ttimes_mes[staname][j]) self.coeff = {} self.delay = {} for staname in self.sta.keys(): self.coeff[staname] = np.zeros((self.N, self.N)) up_tr = np.triu_indices(self.N) self.coeff[staname][up_tr] = 1 self.delay[staname] = np.zeros((self.N, self.N)) for i in range(self.N): for j in range(i + 1, self.N): self.delay[staname][i][j] = \ self.ttimes_true[staname][i] - \ self.ttimes_true[staname][j] + err_to[j]-err_to[i] self.locs_expected = [] for i in range(len(self.locs_true)): self.locs_expected.append({ 'x_mean': self.locs_true[i]['x_mean'] + (centroid_x_mes - centroid_x_true), 'y_mean': self.locs_true[i]['y_mean'] + (centroid_y_mes - centroid_y_true), 'z_mean': self.locs_true[i]['z_mean'] + (centroid_z_mes - centroid_z_true), 'o_time': self.locs_true[i]['o_time'] + np.mean(err_to) })
def stat_cb(self, stat_msg): data_t = stat_msg.data.copy().reshape(self.datagram_size, 1) # Rotate AUV trajectory to place wrt odom in the image data_t[2:5] = self.m2o_mat[0:3, 0:3].dot(data_t[2:5]) data_t[5:8] = self.m2o_mat[0:3, 0:3].dot(data_t[5:8]) data_t[8:11] = self.m2o_mat[0:3, 0:3].dot(data_t[8:11]) # Reconstruct 3x3 covariance matrix # Not account for z values atm cov_mat = np.zeros((3, 3)) cov_mat[np.triu_indices(3, 0)] = np.asarray(data_t[11:17]).reshape(1, 6) cov_mat[1, 0] = cov_mat[0, 1] cov_mat = (self.m2o_mat[0:3, 0:3].transpose().dot(cov_mat)).dot( self.m2o_mat[0:3, 0:3]) data_t[11:17] = cov_mat[np.triu_indices(3)].reshape(6, 1) self.cov_traces.append(np.trace(cov_mat)) self.filt_vec = np.hstack((self.filt_vec, data_t)) self.filter_cnt += 1 if self.filter_cnt > 0: plt.gcf().canvas.mpl_connect( 'key_release_event', lambda event: [exit(0) if event.key == 'escape' else None]) # Plot x,y from GT, odom and PF if False: plt.cla() # Center image on odom frame plt.imshow(self.img, extent=[ -647 - self.m2o_mat[0, 3], 1081 - self.m2o_mat[0, 3], -1190 - self.m2o_mat[1, 3], 523 - self.m2o_mat[1, 3] ]) # plt.imshow(self.img, extent=[-740, 980, -690, 1023]) plt.plot(self.filt_vec[2, :], self.filt_vec[3, :], "-k") plt.plot(self.filt_vec[5, :], self.filt_vec[6, :], "-b") plt.plot(self.filt_vec[8, :], self.filt_vec[9, :], "-r") self.plot_covariance_ellipse(self.filt_vec[5:7, -1], self.filt_vec[11:17, -1]) # Plot error between DR PF and GT if False: plt.subplot(3, 1, 1) plt.cla() plt.plot( np.linspace(0, self.filter_cnt, self.filter_cnt), np.sqrt( np.sum((self.filt_vec[2:4, :] - self.filt_vec[8:10, :])**2, axis=0)), "-k") plt.grid(True) # Error between PF and GT plt.subplot(3, 1, 2) plt.cla() plt.plot( np.linspace(0, self.filter_cnt, self.filter_cnt), np.sqrt( np.sum( (self.filt_vec[2:4, :] - self.filt_vec[5:7, :])**2, axis=0)), "-b") plt.grid(True) # Plot trace of cov matrix plt.subplot(3, 1, 3) plt.cla() plt.plot(np.linspace(0, self.filter_cnt, self.filter_cnt), np.asarray(self.cov_traces), "-k") plt.grid(True) # Plot real pings vs expected meas if True: plt.subplot(1, 1, 1) plt.cla() plt.plot(self.pings_vec[:, 1], self.pings_vec[:, 2], "-k") plt.plot(self.pings_vec[:, 4], self.pings_vec[:, 5], "-b") # For debugging # print (self.pings_vec[:, 2]) # print (self.pings_vec[:, 5]) # print (self.pings_vec[:, 2] - self.pings_vec[:, 5]) # print (np.linalg.norm(self.pings_vec[:, 2] - self.pings_vec[:, 5])) # print(np.gradient(exp_mbes_ranges) - np.gradient(real_mbes_ranges)) # print(self.meas_cov) # print (np.linalg.norm(exp_mbes_ranges - real_mbes_ranges)) # print (np.linalg.norm(np.gradient(real_mbes_ranges) # - np.gradient(exp_mbes_ranges))) plt.grid(True) plt.pause(0.0001) if self.survey_finished: plt.savefig(self.survey_name + "_tracks.png")
def parse_roa(self): """ Parse the :class:`~exatomic.core.tensor.Polarizability` dataframe. This will parse the output from the Raman Optical Activity outputs. Note: We generate a 3D tensor with the 2D tensor code. 3D tensors will have 3 rows labeled with the same name. """ _reroa = 'roa begin' _reare = 'alpha real' _reaim = 'alpha im' # _reombre = 'beta real' # _reombim = 'beta im' _reombre = 'omega beta(real)' _reombim = 'omega beta(imag)' _redqre = 'dipole-quadrupole real (Cartesian)' _redqim = 'dipole-quadrupole imag (Cartesian)' if not self.find(_reroa): return found_2d = self.find(_reare, _reaim, _reombre, _reombim, keys_only=True) found_3d = self.find(_redqre, _redqim, keys_only=True) data = {} start = np.array(list(found_2d.values())).reshape(4, ) + 1 end = np.array(list(found_2d.values())).reshape(4, ) + 10 columns = ['x', 'val'] data = [ self.pandas_dataframe(s, e, columns) for s, e in zip(start, end) ] df = pd.concat([dat for dat in data]).reset_index(drop=True) df['grp'] = [i for i in range(4) for j in range(9)] df = df[['val', 'grp']] df = pd.DataFrame( df.groupby('grp').apply( lambda x: x.unstack().values[:-9]).values.tolist(), columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']) # find the electric dipole-quadrupole polarizability # NWChem gives this as a list of 18 values assuming the matrix to be symmetric # for our implementation we need to extend it to 27 elements # TODO: check that NWChem does assume that the 3D tensors are symmetric start = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 1 end = np.sort(np.array(list(found_3d.values())).reshape(2, )) + 19 data = [ self.pandas_dataframe(s, e, columns) for s, e in zip(start, end) ] df3 = pd.concat([dat for dat in data]).reset_index(drop=True) vals = df3['val'].values.reshape(2, 3, 6) adx = np.triu_indices(3) mat = np.zeros((2, 3, 3, 3)) for i in range(2): for j in range(3): mat[i][j][adx] = vals[i][j] mat[i][j] = mat[i][j] + np.transpose( mat[i][j]) - np.identity(3) * mat[i][j] mat = mat.reshape(18, 3) df3 = pd.DataFrame(mat, columns=['x', 'y', 'z']) df3['grp1'] = [i for i in range(2) for j in range(9)] df3['grp2'] = [j for i in range(2) for j in range(3) for n in range(3)] df3 = pd.DataFrame( df3.groupby([ 'grp1', 'grp2' ]).apply(lambda x: x.unstack().values[:-6]).values.tolist(), columns=['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'], index=[ 'Ax_real', 'Ay_real', 'Az_real', 'Ax_imag', 'Ay_imag', 'Az_imag' ]) split_label = np.transpose([i.split('_') for i in df3.index.values]) label = split_label[0] types = split_label[1] df['label'] = found_2d.keys() df['label'].replace( [_reare, _reombre, _reaim, _reombim], ['alpha-real', 'g_prime-real', 'alpha-imag', 'g_prime-imag'], inplace=True) df['type'] = [i.split('-')[-1] for i in df['label'].values] df['label'] = [i.split('-')[0] for i in df['label'].values] df['frame'] = np.repeat([0], len(df.index)) df3['label'] = label df3['type'] = types df3['frame'] = np.repeat([0], len(df3.index)) self.roa = pd.concat([df, df3], ignore_index=True)
def symmetry(self) -> Tuple[Real, ...]: indices = zip(*np.triu_indices(self.dimension, 1)) return tuple( self._graph.edges.get(i, {"weight": 2})["weight"] for i in indices)
def _bs_three_point(cols): """deconvolution function for three_point. Parameters ---------- cols : int width of the image """ # function Eq. (7) for j >= i def I0diag(i, j): return np.log((np.sqrt((2 * j + 1)**2 - 4 * i**2) + 2 * j + 1) / (2 * j)) / (2 * np.pi) # j > i def I0(i, j): return np.log( ((np.sqrt((2 * j + 1)**2 - 4 * i**2) + 2 * j + 1)) / (np.sqrt((2 * j - 1)**2 - 4 * i**2) + 2 * j - 1)) / (2 * np.pi) # i = j NB minus -2I_ij typo in Dasch paper def I1diag(i, j): return np.sqrt( (2 * j + 1)**2 - 4 * i**2) / (2 * np.pi) - 2 * j * I0diag(i, j) # j > i def I1(i, j): return (np.sqrt((2 * j + 1)**2 - 4 * i**2) - np.sqrt( (2 * j - 1)**2 - 4 * i**2)) / (2 * np.pi) - 2 * j * I0(i, j) D = np.zeros((cols, cols)) # matrix indices ------------------ # i = j I, J = np.diag_indices(cols) I = I[1:] J = J[1:] # drop special cases (0,0), (0,1) # j = i - 1 Ib, Jb = I, J - 1 # j = i + 1 Iu, Ju = I - 1, J Iu = Iu[1:] # drop special case (0, 1) Ju = Ju[1:] # j > i + 1 Iut, Jut = np.triu_indices(cols, k=2) Iut = Iut[1:] # drop special case (0, 2) Jut = Jut[1:] # D operator matrix ------------------ # j = i - 1 D[Ib, Jb] = I0diag(Ib, Jb + 1) - I1diag(Ib, Jb + 1) # j = i D[I, J] = I0(I, J + 1) - I1(I, J + 1) + 2 * I1diag(I, J) # j = i + 1 D[Iu, Ju] = I0(Iu, Ju+1) - I1(Iu, Ju+1) + 2*I1(Iu, Ju) -\ I0diag(Iu, Ju-1) - I1diag(Iu, Ju-1) # j > i + 1 D[Iut, Jut] = I0(Iut, Jut+1) - I1(Iut, Jut+1) + 2*I1(Iut, Jut) -\ I0(Iut, Jut-1) - I1(Iut, Jut-1) # special cases (that switch between I0, I1 cases) D[0, 2] = I0(0, 3) - I1(0, 3) + 2 * I1(0, 2) - I0(0, 1) - I1(0, 1) D[0, 1] = I0(0, 2) - I1(0, 2) + 2 * I1(0, 1) - 1 / np.pi D[0, 0] = I0(0, 1) - I1(0, 1) + 1 / np.pi return D
def base_angles(self) -> np.ndarray: return self._primal_gramian[np.triu_indices(self.dimension, 1)].copy()
def test_cov_estimation_on_raw(method, tmpdir): """Test estimation from raw (typically empty room).""" if method == 'shrunk': try: import sklearn # noqa: F401 except Exception as exp: pytest.skip('sklearn is required, got %s' % (exp, )) raw = read_raw_fif(raw_fname, preload=True) cov_mne = read_cov(erm_cov_fname) method_params = dict(shrunk=dict(shrinkage=[0])) # The pure-string uses the more efficient numpy-based method, the # the list gets triaged to compute_covariance (should be equivalent # but use more memory) with pytest.warns(None): # can warn about EEG ref cov = compute_raw_covariance(raw, tstep=None, method=method, rank='full', method_params=method_params) assert_equal(cov.ch_names, cov_mne.ch_names) assert_equal(cov.nfree, cov_mne.nfree) assert_snr(cov.data, cov_mne.data, 1e6) # test equivalence with np.cov cov_np = np.cov(raw.copy().pick_channels(cov['names']).get_data(), ddof=1) if method != 'shrunk': # can check all off_diag = np.triu_indices(cov_np.shape[0]) else: # We explicitly zero out off-diag entries between channel types, # so let's just check MEG off-diag entries off_diag = np.triu_indices( len(pick_types(raw.info, meg=True, exclude=()))) for other in (cov_mne, cov): assert_allclose(np.diag(cov_np), np.diag(other.data), rtol=5e-6) assert_allclose(cov_np[off_diag], other.data[off_diag], rtol=4e-3) assert_snr(cov.data, other.data, 1e6) # tstep=0.2 (default) with pytest.warns(None): # can warn about EEG ref cov = compute_raw_covariance(raw, method=method, rank='full', method_params=method_params) assert_equal(cov.nfree, cov_mne.nfree - 120) # cutoff some samples assert_snr(cov.data, cov_mne.data, 170) # test IO when computation done in Python cov.save(tmpdir.join('test-cov.fif')) # test saving cov_read = read_cov(tmpdir.join('test-cov.fif')) assert cov_read.ch_names == cov.ch_names assert cov_read.nfree == cov.nfree assert_array_almost_equal(cov.data, cov_read.data) # test with a subset of channels raw_pick = raw.copy().pick_channels(raw.ch_names[:5]) raw_pick.info.normalize_proj() cov = compute_raw_covariance(raw_pick, tstep=None, method=method, rank='full', method_params=method_params) assert cov_mne.ch_names[:5] == cov.ch_names assert_snr(cov.data, cov_mne.data[:5, :5], 5e6) cov = compute_raw_covariance(raw_pick, method=method, rank='full', method_params=method_params) assert_snr(cov.data, cov_mne.data[:5, :5], 90) # cutoff samps # make sure we get a warning with too short a segment raw_2 = read_raw_fif(raw_fname).crop(0, 1) with pytest.warns(RuntimeWarning, match='Too few samples'): cov = compute_raw_covariance(raw_2, method=method, method_params=method_params) # no epochs found due to rejection pytest.raises(ValueError, compute_raw_covariance, raw, tstep=None, method='empirical', reject=dict(eog=200e-6)) # but this should work with pytest.warns(None): # sklearn cov = compute_raw_covariance(raw.copy().crop(0, 10.), tstep=None, method=method, reject=dict(eog=1000e-6), method_params=method_params, verbose='error')
def computeParameters(neighbors, potential_config): # First we need a list of every unique combination of # two neighbors, not considering [0, 1] to be unique # compared to [1, 0]. More specifically, a different # order does not make the pair unique. length = neighbors.shape[0] grid = np.mgrid[0:length, 0:length] grid = grid.swapaxes(0, 2).swapaxes(0, 1) m = grid.shape[0] r, c = np.triu_indices(m, 1) combinations = grid[r, c] left_array = neighbors[combinations[:, 0]] right_array = neighbors[combinations[:, 1]] # Now we use these pairs of vectors to compute and array of # cos(theta) values. Strangely enough, this appears to be the # fastest way to do this. dot_products = np.einsum('ij,ij->i', left_array, right_array) # This is the magnitude of all of the vectors in the left array. left_magnitudes = np.linalg.norm(left_array, axis=1) # This is the magnitude of all of the vectors in the right array. right_magnitudes = np.linalg.norm(right_array, axis=1) # The following two lines are essentially computing # (r_i * r_j) / (|r_i||r_j|) where '*' denotes the dot product. magnitude_products = left_magnitudes * right_magnitudes angular_values = dot_products / magnitude_products # Here we skip some steps and just add an array of 1.0 onto # the array of cos(theta) values. This is for all cases where # i = j, so we know for a fact that theta = 0 and cos(theta) = 1.0 dupl_indices = np.arange(0, length, 1) dupl_magnitudes = np.linalg.norm(neighbors[dupl_indices], axis=1) angular_values = np.concatenate((angular_values, np.tile([1.0], length))) # angular values now holds an array of cos(theta_ijk) for all unique i, j. # Next, we need to compute and array of radial terms for each r0 value. s2 = 1.0 / (potential_config.gi_sigma**2) # This is an array of all radial terms for # all values of r0. radial_terms = [] # These operations are not done inside of the subsequent loop, because # their values do not vary with respect to r0. It is worth noting that # you could do this inside of the loop without any slowdown, but that is # because numpy will cache the values and does not compute them again when # it doesn't need to. # The computation involving tanh at the end of the cutoff function # terms is just a mathematical way of making fc be zero if r > rc. # Adding an if statement would require numpy to jump out of c code and # in to python code in order to evaluate it. This would significantly # slow down the operation. (During testing slowdown was 50 - 100 times) # see https://www.desmos.com/calculator/puz9hpi090 # This has been thoroughly tested against some c code that uses an if # statement. The results are bitwise identical for 9 large test cases. d4 = np.square(np.square(potential_config.truncation_distance)) left_r_rc_unmodified = left_magnitudes - potential_config.cutoff_distance left_r_rc_terms = np.square(np.square(left_r_rc_unmodified)) left_fc = (left_r_rc_terms / (d4 + left_r_rc_terms)) left_fc *= (0.5 * np.tanh(-1e6 * (left_r_rc_unmodified)) + 0.5) right_r_rc_unmodified = right_magnitudes - potential_config.cutoff_distance right_r_rc_terms = np.square(np.square(right_r_rc_unmodified)) right_fc = (right_r_rc_terms / (d4 + right_r_rc_terms)) right_fc *= (0.5 * np.tanh(-1e6 * (right_r_rc_unmodified)) + 0.5) r_rc_unmodified = dupl_magnitudes - potential_config.cutoff_distance r_rc_terms = np.square(np.square(r_rc_unmodified)) fc = (r_rc_terms / (d4 + r_rc_terms)) fc *= (0.5 * np.tanh(-1e6 * (r_rc_unmodified)) + 0.5) # Here we calculate the radial term for all values of r0. for r0n in potential_config.r0: # The left_* and right_* arrays correspond to cases where # r_i != r_j. In these cases, we need to calculate both of # the functions (f) independently. left_term = np.exp(-s2 * np.square(left_magnitudes - r0n)) full_left_term = left_term * left_fc right_term = np.exp(-s2 * np.square(right_magnitudes - r0n)) full_right_term = right_term * right_fc # These two arrays correspond to cases where r_i = r_j and we # know that we just need to square the value of the function # (f) after computing it once. term = np.exp(-s2 * np.square(dupl_magnitudes - r0n)) full_term = term * fc # In this statement, we multiply the radial term by 2, because # cases where r_i != r_j are supposed to be repeated, with the # vectors swapped. Since the function we are computing on them # is commutative, we can just compute one case of r_i != r_j # and double it to account for the case where r_i is swapped with # r_j. This cuts the computation time in half. to_add = np.concatenate( (2 * full_right_term * full_left_term, np.square(full_term))) radial_terms.append(to_add) # Now radial_terms is an array where each first index corresponds # to an r0 value and each second index corresponds to the product # of the radial terms for a unique combination of neighbors. # For each r0 and for each combination of neigbors, we now # Need to compute the m-th Legendre polynomial of the cosine # of the angle between the two. # This uses the recursive definition of the Legendre Polynomials # in order to generalize to any specified order in the nn file. max_pm = max(potential_config.legendre_orders) legendre_polynomials = np.zeros((max_pm + 1, len(angular_values))) legendre_polynomials[0] = np.ones(len(angular_values)) legendre_polynomials[1] = angular_values for order in range(1, max_pm): current_pm = (2 * order + 1) * angular_values * legendre_polynomials[order] current_pm -= order * legendre_polynomials[order - 1] current_pm /= (order + 1) legendre_polynomials[order + 1] = current_pm # Now we multiply the Legendre Polynomial terms by the radial terms and # sum them. This also selects the desired legendre polynomials from the # list of those computed. Since the recursive definition is used, legendre # polynomials may be computed that aren't actually used in the final # result. len_pm = len(potential_config.legendre_orders) structural_parameters = np.zeros(len_pm * len(potential_config.r0)) idx = 0 for order in potential_config.legendre_orders: for r0n in radial_terms: current_param = np.sum(legendre_polynomials[order] * r0n) structural_parameters[idx] = current_param idx += 1 # The following lines exist to adhere to a combination of the # hyperparameter definition in the network potential file and the # configuration values specified in the config file. sp = structural_parameters sp /= np.square(np.tile(potential_config.r0, len_pm)) if potential_config.gi_mode == 5: return np.arcsinh(sp) else: return sp
def build_all_commutators(self, excitations_list, hopping_operators, type_of_commutativities): """Building all commutators for Q, W, M, V matrices. Args: excitations_list (list): single excitations list + double excitation list hopping_operators (dict): all hopping operators based on excitations_list, key is the string of single/double excitation; value is corresponding operator. type_of_commutativities: if tapering is used, it records the commutativities of hopping operators with the Z2 symmetries found in the original operator. Returns: dict: key: a string of matrix indices; value: the commutators for Q matrix dict: key: a string of matrix indices; value: the commutators for W matrix dict: key: a string of matrix indices; value: the commutators for M matrix dict: key: a string of matrix indices; value: the commutators for V matrix int: number of entries in the matrix """ size = len(excitations_list) m_commutators = np.empty((size, size), dtype=object) v_commutators = np.empty((size, size), dtype=object) q_commutators = np.empty((size, size), dtype=object) w_commutators = np.empty((size, size), dtype=object) # get all to-be-processed index if self._is_eom_matrix_symmetric: mus, nus = np.triu_indices(size) else: mus, nus = np.indices((size, size)) mus = np.asarray(mus.flat) nus = np.asarray(nus.flat) def _build_one_sector(available_hopping_ops): to_be_computed_list = [] for idx in range(len(mus)): mu = mus[idx] nu = nus[idx] left_op = available_hopping_ops.get( '_'.join([str(x) for x in excitations_list[mu]]), None) right_op_1 = available_hopping_ops.get( '_'.join([str(x) for x in excitations_list[nu]]), None) right_op_2 = available_hopping_ops.get( '_'.join([str(x) for x in reversed(excitations_list[nu])]), None) to_be_computed_list.append( (mu, nu, left_op, right_op_1, right_op_2)) if logger.isEnabledFor(logging.INFO): logger.info("Building all commutators:") TextProgressBar(sys.stderr) results = parallel_map( QEquationOfMotion._build_commutator_rountine, to_be_computed_list, task_args=(self._untapered_op, self._z2_symmetries)) for result in results: mu, nu, q_mat_op, w_mat_op, m_mat_op, v_mat_op = result q_commutators[mu][ nu] = op_converter.to_tpb_grouped_weighted_pauli_operator( q_mat_op, TPBGroupedWeightedPauliOperator.sorted_grouping ) if q_mat_op is not None else q_commutators[mu][nu] w_commutators[mu][ nu] = op_converter.to_tpb_grouped_weighted_pauli_operator( w_mat_op, TPBGroupedWeightedPauliOperator.sorted_grouping ) if w_mat_op is not None else w_commutators[mu][nu] m_commutators[mu][ nu] = op_converter.to_tpb_grouped_weighted_pauli_operator( m_mat_op, TPBGroupedWeightedPauliOperator.sorted_grouping ) if m_mat_op is not None else m_commutators[mu][nu] v_commutators[mu][ nu] = op_converter.to_tpb_grouped_weighted_pauli_operator( v_mat_op, TPBGroupedWeightedPauliOperator.sorted_grouping ) if v_mat_op is not None else v_commutators[mu][nu] available_entry = 0 if not self._z2_symmetries.is_empty(): for targeted_tapering_values in itertools.product( [1, -1], repeat=len(self._z2_symmetries.symmetries)): logger.info("In sector: ({})".format(','.join( [str(x) for x in targeted_tapering_values]))) # remove the excited operators which are not suitable for the sector available_hopping_ops = {} targeted_sector = (np.asarray(targeted_tapering_values) == 1) for key, value in type_of_commutativities.items(): value = np.asarray(value) if np.all(value == targeted_sector): available_hopping_ops[key] = hopping_operators[key] _build_one_sector(available_hopping_ops) available_entry += len(available_hopping_ops) * len( available_hopping_ops) else: available_hopping_ops = hopping_operators _build_one_sector(available_hopping_ops) available_entry = len(available_hopping_ops) * len( available_hopping_ops) return q_commutators, w_commutators, m_commutators, v_commutators, available_entry
def local_variation(data): r""" Calculates the local variaiont of inter-contact times. [LV-1]_, [LV-2]_ Parameters ---------- data : array, dict This is either (1) temporal network input (graphlet or contact) with nettype: 'bu', 'bd'. (2) dictionary of ICTs (output of *intercontacttimes*). Returns ------- LV : array Local variation per edge. Notes ------ The local variation is like the bursty coefficient and quantifies if a series of inter-contact times are periodic, random or Poisson distributed or bursty. It is defined as: .. math:: LV = {3 \over {n-1}}\sum_{i=1}^{n-1}{{{\iota_i - \iota_{i+1}} \over {\iota_i + \iota_{i+1}}}^2} Where :math:`\iota` are inter-contact times and i is the index of the inter-contact time (not a node index). n is the number of events, making n-1 the number of inter-contact times. The possible range is: :math:`0 \geq LV \gt 3`. When periodic, LV=0, Poisson, LV=1 Larger LVs indicate bursty process. Examples --------- First import all necessary packages >>> import teneto >>> import numpy as np Now create 2 temporal network of 2 nodes and 60 time points. The first has periodict edges, repeating every other time-point: >>> G_periodic = np.zeros([2, 2, 60]) >>> ts_periodic = np.arange(0, 60, 2) >>> G_periodic[:,:,ts_periodic] = 1 The second has a more bursty pattern of edges: >>> ts_bursty = [1, 8, 9, 32, 33, 34, 39, 40, 50, 51, 52, 55] >>> G_bursty = np.zeros([2, 2, 60]) >>> G_bursty[:,:,ts_bursty] = 1 Now we call local variation for each edge. >>> LV_periodic = teneto.networkmeasures.local_variation(G_periodic) >>> LV_periodic array([[nan, 0.], [ 0., nan]]) Above we can see that between node 0 and 1, LV=0 (the diagonal is nan). This is indicative of a periodic contacts (which is what we defined). Doing the same for the second example: >>> LV_bursty = teneto.networkmeasures.local_variation(G_bursty) >>> LV_bursty array([[ nan, 1.28748748], [1.28748748, nan]]) When the value is greater than 1, it indicates a bursty process. nans are returned if there are no intercontacttimes References ---------- .. [LV-1] Shinomoto et al (2003) Differences in spiking patterns among cortical neurons. Neural Computation 15.12 [`Link <https://www.mitpressjournals.org/doi/abs/10.1162/089976603322518759>`_] .. [LV-2] Followed eq., 4.34 in Masuda N & Lambiotte (2016) A guide to temporal networks. World Scientific. Series on Complex Networks. Vol 4 [`Link <https://www.worldscientific.com/doi/abs/10.1142/9781786341150_0001>`_] """ ict = 0 # are ict present if isinstance(data, dict): # This could be done better if [k for k in list(data.keys()) if k == 'intercontacttimes'] == ['intercontacttimes']: ict = 1 # if shortest paths are not calculated, calculate them if ict == 0: data = intercontacttimes(data) if data['nettype'][1] == 'u': ind = np.triu_indices(data['intercontacttimes'].shape[0], k=1) if data['nettype'][1] == 'd': triu = np.triu_indices(data['intercontacttimes'].shape[0], k=1) tril = np.tril_indices(data['intercontacttimes'].shape[0], k=-1) ind = [[], []] ind[0] = np.concatenate([tril[0], triu[0]]) ind[1] = np.concatenate([tril[1], triu[1]]) ind = tuple(ind) ict_shape = data['intercontacttimes'].shape lv = np.zeros(ict_shape) for n in range(len(ind[0])): icts = data['intercontacttimes'][ind[0][n], ind[1][n]] # make sure there is some contact if icts is not None: lv_nonnorm = np.sum( np.power((icts[:-1] - icts[1:]) / (icts[:-1] + icts[1:]), 2)) lv[ind[0][n], ind[1][n]] = (3 / len(icts)) * lv_nonnorm else: lv[ind[0][n], ind[1][n]] = np.nan # Make symetric if undirected if data['nettype'][1] == 'u': lv = lv + lv.transpose() for n in range(lv.shape[0]): lv[n, n] = np.nan return lv
def Euc_to_fst(vector_lib,n_comp= 5,pop_max= 8,Iter= 20,bias_range= [20,300],Eigen= False, Scale= False,Centre= True,ploidy= 1): ### Select pre and post processing measures. length_haps= vector_lib.shape[1] Iter= 20 # repeats #### Predict predicted= [] #def controled_fsts(vector_lib,Eigen,length_haps,Scale,Center,N_pops,n_comp,Iter,N_sims,MixL,MixP,Pairs): lengths_vector= [] ### store distances between centroids biased_pairwise= [] ### store PC projection: dist_PC_corrected= {x:[] for x in range(n_comp)} ### store fsts fst_store= [] ### proceed. for rep in range(Iter): clear_output() N_pops= np.random.choice(range(3,pop_max),1,replace= False)[0] ## Population Sizes and labels bias_scheme= np.random.choice(range(bias_range[0],bias_range[1]),N_pops,replace= False) bias_labels= np.repeat(np.array([x for x in range(N_pops)]),bias_scheme) ### triangular matrices extract. iu1= np.triu_indices(N_pops,1) # for centroid comparison iu_bias= np.triu_indices(sum(bias_scheme),1) iu_control= np.triu_indices(2,1) Pops= np.random.choice(vector_lib.shape[0],N_pops,replace= False) print('Iter: {}, vectors selected: {}, hap length: {}'.format(rep,Pops,length_haps)) ########## FST freqs_selected= vector_lib[Pops,:length_haps] Pairwise= Ste.return_fsts2(freqs_selected) #fsts_compare = scale(Pairwise.fst) fsts_compare= Pairwise.fst fst_store.extend(fsts_compare) ## lengths lengths_vector.extend([length_haps] * len(fsts_compare)) #### generate data and perform PCA data= [] for k in range(N_pops): probs= vector_lib[Pops[k],:] m= bias_scheme[k] Haps= [[np.random.choice([ploidy,0],p= [1-probs[x],probs[x]]) for x in range(length_haps)] for acc in range(m)] data.extend(Haps) data2= np.array(data) if Scale: data2= scale(data2) pca = PCA(n_components=n_comp, whiten=False,svd_solver='randomized').fit(data2) feat_bias= pca.transform(data2) if Eigen: feat_bias= feat_bias * pca.explained_variance_ratio_ #### Centroid distances bias_centroids= [np.mean(feat_bias[[y for y in range(feat_bias.shape[0]) if bias_labels[y] == z],:],axis= 0) for z in range(N_pops)] bias_centroids= np.array(bias_centroids) bias_pair_dist= pairwise_distances(bias_centroids,metric= 'euclidean') bias_pair_dist= bias_pair_dist[iu1] #bias_pair_dist= scale(bias_pair_dist) biased_pairwise.extend(bias_pair_dist) Size= length_haps fst_lm_range= [0,.3] Lindexes= [x for x in range(len(lengths_vector)) if lengths_vector[x] == Size and fst_store[x] >= fst_lm_range[0] and fst_store[x] <= fst_lm_range[1]] y_true= [np.log(biased_pairwise[x]) for x in Lindexes] fst_x= [np.log(fst_store[x]) for x in Lindexes] m_coeff,b= np.polyfit(y_true,fst_x,1) return m_coeff, b, biased_pairwise, fst_x, y_true
train_label = train_label["all_label"] train_label = np.reshape(train_label, (-1, )) # Calculate edges for all parcellations for training data num_sub = np.shape(train_mats_aal)[2] num_node_aal = np.shape(train_mats_aal)[0] num_edge_aal = num_node_aal * (num_node_aal - 1) // 2 num_node_ho = np.shape(train_mats_ho)[0] num_edge_ho = num_node_ho * (num_node_ho - 1) // 2 num_node_cc = np.shape(train_mats_cc)[0] num_edge_cc = num_node_cc * (num_node_cc - 1) // 2 train_edges = np.zeros([num_edge_aal + num_edge_ho + num_edge_cc, num_sub]) for i_sub in range(num_sub): iu_aal = np.triu_indices(num_node_aal, 1) iu_ho = np.triu_indices(num_node_ho, 1) iu_cc = np.triu_indices(num_node_cc, 1) train_edges[0:num_edge_aal, i_sub] = train_mats_aal[iu_aal[0], iu_aal[1], i_sub] train_edges[num_edge_aal:(num_edge_aal + num_edge_ho), i_sub] = train_mats_ho[iu_ho[0], iu_ho[1], i_sub] train_edges[(num_edge_aal + num_edge_ho):(num_edge_aal + num_edge_ho + num_edge_cc), i_sub] = train_mats_cc[iu_cc[0], iu_cc[1], i_sub] train_edges_aal = train_edges[0:num_edge_aal, ] train_edges_ho = train_edges[num_edge_aal:(num_edge_aal + num_edge_ho), ] train_edges_cc = train_edges[(num_edge_aal + num_edge_ho):(num_edge_aal + num_edge_ho + num_edge_cc), ]