def association(gene, context, return_snps=False): #capture context n_snps_in_model, i, cov, snps = context.provide_calculation(gene) #some stats snps_used = i[Constants.SNP] n_snps_used = len(snps_used) n_snps_in_cov = context.get_n_in_covariance(gene) zscore, effect_size, sigma_g_2 = numpy.nan, numpy.nan, numpy.nan if n_snps_used > 0: i_weight = i[WDBQF.K_WEIGHT] i_zscore = i[Constants.ZSCORE] i_beta = i[Constants.BETA] # sigma from reference variances = numpy.diag(cov) i_sigma_l = numpy.sqrt(variances) #da calculeishon sigma_g_2 = float(d(d(i_weight,cov),i_weight)) if sigma_g_2 >0: try: zscore = numpy.sum(i_weight * i_zscore * i_sigma_l) / numpy.sqrt(sigma_g_2) effect_size = numpy.sum(i_weight * i_beta * (i_sigma_l**2))/ sigma_g_2 except Exception as e: logging.log(9, "Unexpected exception when calculating zscore: %s, %s", gene, str(e)) r = (gene, zscore, effect_size, sigma_g_2, n_snps_in_model, n_snps_in_cov, n_snps_used) if return_snps: return r, set(snps_used) else: return r
def _get(variants, ids, cutoff, regularization, f=True): geno = [variants[x] for x in ids] cov = numpy.cov(geno) sigma = cov[:len(ids) - 1, :len(ids) - 1] rho = cov[-1:, :-1][0] sigma_inv, n_indep, eigen = Math.crpinv(sigma, cutoff, regularization) w = d(rho, sigma_inv) s = math.sqrt(d(rho, w)) if f else math.sqrt(d(w, d(sigma, w))) return w, s, n_indep, eigen
def _get_multi(geno, typed, cutoff, regularization): cov = numpy.cov(geno) sigma_tt = cov[:typed.shape[0], :typed.shape[0]] sigma_it = cov[typed.shape[0]:, :typed.shape[0]] sigma_inv, n_indep, eigen = Math.crpinv(sigma_tt, cutoff, regularization) w = d(sigma_it, sigma_inv) zscore = d(w, typed.zscore) _w = d(sigma_it, sigma_inv) variance = numpy.sum(numpy.multiply(sigma_it, _w), axis=1) return zscore, variance, sigma_tt.shape[0], n_indep
def get_Boley_undirected(tp): """Boley et al define an undirected graph which "corresponds to" a directed graph. Its adjacency matrix is G**s = (Pi * P + P' * Pi)/2, where Pi is the steady-state set out along a diagonal and P is the transition probability matrix. But we will get the transition probability matrix: P**s = (P + inv(Pi) * P.T * Pi) / 2 Note that this matrix is not necessarily symmetric. """ from numpy import dot as d P = tp Pi = np.diag(get_steady_state(tp)) return (P + d(d(np.linalg.inv(Pi), P.T), Pi)) / 2.0
def association(gene, context, return_snps=False): #capture context n_snps_in_model, i, cov, snps = context.provide_calculation(gene) if logging.getLogger().getEffectiveLevel() < 10: d_ = numpy.linalg.eig(cov)[0] if numpy.sum(numpy.less(d_, 1e-6)): logging.info("Gene %s has covariance close to singular", gene) #some stats snps_used = i[Constants.SNP] n_snps_used = len(snps_used) n_snps_in_cov = context.get_n_in_covariance(gene) zscore, effect_size, sigma_g_2 = numpy.nan, numpy.nan, numpy.nan if n_snps_used > 0: i_weight = i[WDBQF.K_WEIGHT] i_zscore = i[Constants.ZSCORE] i_beta = i[Constants.BETA] # sigma from reference variances = numpy.diag(cov) i_sigma_l = numpy.sqrt(variances) #da calcooleishon sigma_g_2 = float(d(d(i_weight, cov), i_weight)) if sigma_g_2 > 0: try: zscore = numpy.sum( i_weight * i_zscore * i_sigma_l) / numpy.sqrt(sigma_g_2) effect_size = numpy.sum(i_weight * i_beta * (i_sigma_l**2)) / sigma_g_2 except Exception as e: logging.log( 9, "Unexpected exception when calculating zscore: %s, %s", gene, str(e)) r = (gene, zscore, effect_size, sigma_g_2, n_snps_in_model, n_snps_in_cov, n_snps_used) if return_snps: return r, set(snps_used) else: return r
def _get_z(variants, ids, gwas_slice, cutoff, regularization, f=True): w, s, n_indep, eigen = _get(variants, ids, cutoff, regularization, f=f) z = d(w, gwas_slice.zscore) / s if s > 0 else None return z, s, n_indep, eigen
def RSP_and_FE_distances(A, beta, C=None): """Calculate the randomised shortest path distance and free-energy distance, as defined in "Developments in the theory of randomized shortest paths with a comparison of graph node distances", Kivim\"{a}ki, Shimbo, and Saerens, Physica A, 2013. Arguments - A: Adjacency matrix, whose elements represent affinities between nodes, which define the reference transition probabilities. A can be asymmetric (for directed graphs). Distances between nodes that are not strongly connected are Inf. - beta beta should lie more or less between 10^-8 and 20, but this depends on the size of the graph and the magnitude of the costs. When beta --> 0, we obtain the commute cost distances. When beta --> \infty, we obtain the shortest path (lowest cost) distances. - C Cost matrix, whose elements represent the cost of traversing an edge of the graph. Infinite costs can be marked as zeros (zero costs are anyway not allowed). If C is not provided, then the costs will be set by default as c_ij = 1/a_ij. Returns D_RSP: the RSP dissimilarity matrix D_FE: the free energy distance matrix Original Matlab code and comments (c) Ilkka Kivim\"{a}ki 2013 Transliterated to Python/Numpy by James McDermott <*****@*****.**>. Helpful guides to this type of transliteration: http://mathesaurus.sourceforge.net/matlab-numpy.html, http://wiki.scipy.org/NumPy_for_Matlab_Users, http://wiki.scipy.org/Tentative_NumPy_Tutorial """ max = np.finfo('d').max eps = 0.00000001 # If A is integer-valued, and beta is floating-point, can get an # error in the matrix inversion, so convert A to float here. I # can't explain why beta being floating-point is related to the # problem. Anyway, this also converts in case it was a matrix, or # was sparse. A = np.array(A, dtype=np.float) A[A < eps] = 0.0 n, m = A.shape if n != m: raise ValueError("The input matrix A must be square") if C is None: C = A.copy() C[A >= eps] = 1.0/A[A >= eps] C[A < eps] = max # check beta value? if beta < eps or beta > 20.0: raise ValueError("The value for beta is outside the expected range, 0 to 20.0") ones = np.ones(n) onesT = np.ones((n, 1)) I = np.eye(n) # Computation of Pref, the reference transition probability matrix tmp = A.copy() s = np.sum(tmp, 1) s[s == 0] = 1 # avoid zero-division Pref = tmp / (s * onesT).T # Computation of the W and Z matrices W = np.exp(-beta * C) * Pref # compute Z Z = linalg.inv(I - W) # Computation of Z*(C.*W)*Z avoiding zero-division errors: numerator = d(d(Z, (C * W)), Z) D_nonabs = np.zeros((n, n)) indx = (numerator > 0) & (Z > 0) D_nonabs[indx] = numerator[indx] / Z[indx] D_nonabs[~indx] = np.infty # D_nonabs above actually gives the expected costs of non-hitting paths # from i to j. # Expected costs of hitting paths -- avoid a possible inf-inf # which can arise with isolated nodes and would give a NaN -- we # prefer to have inf in that case. C_RSP = np.zeros((n, n)) diag_D = d(onesT, np.diag(D_nonabs).reshape((1, n))) indx = ~np.isinf(diag_D) C_RSP[indx] = D_nonabs[indx] - diag_D[indx] C_RSP[~indx] = np.infty # symmetrization D_RSP = 0.5 * (C_RSP + C_RSP.T) # Free energies and symmetrization: Dh_1 = np.diag(1.0/np.diag(Z)) Zh = d(Z, Dh_1) # If there any 0 values in Zh (because of isolated nodes), taking # log will raise a divide-by-zero error -- ignore it np.seterr(divide='ignore') FE = -np.log(Zh)/beta np.seterr(divide='raise') D_FE = 0.5 * (FE + FE.T) # Just in case, set diagonals to zero: np.fill_diagonal(D_RSP, 0.0) np.fill_diagonal(D_FE, 0.0) return D_RSP, D_FE