def association(gene, context, return_snps=False):
    #capture context
    n_snps_in_model, i, cov, snps = context.provide_calculation(gene)

    #some stats
    snps_used = i[Constants.SNP]
    n_snps_used = len(snps_used)
    n_snps_in_cov = context.get_n_in_covariance(gene)

    zscore, effect_size, sigma_g_2 = numpy.nan, numpy.nan, numpy.nan
    if n_snps_used > 0:
        i_weight = i[WDBQF.K_WEIGHT]
        i_zscore = i[Constants.ZSCORE]
        i_beta = i[Constants.BETA]
        # sigma from reference
        variances = numpy.diag(cov)
        i_sigma_l = numpy.sqrt(variances)

        #da calculeishon
        sigma_g_2 = float(d(d(i_weight,cov),i_weight))
        if sigma_g_2 >0:
            try:
                zscore = numpy.sum(i_weight * i_zscore * i_sigma_l) / numpy.sqrt(sigma_g_2)
                effect_size = numpy.sum(i_weight * i_beta * (i_sigma_l**2))/ sigma_g_2
            except Exception as e:
                logging.log(9, "Unexpected exception when calculating zscore: %s, %s", gene, str(e))

    r = (gene, zscore, effect_size, sigma_g_2, n_snps_in_model, n_snps_in_cov, n_snps_used)

    if return_snps:
        return r, set(snps_used)
    else:
        return r
示例#2
0
def _get(variants, ids, cutoff, regularization, f=True):
    geno = [variants[x] for x in ids]
    cov = numpy.cov(geno)
    sigma = cov[:len(ids) - 1, :len(ids) - 1]
    rho = cov[-1:, :-1][0]
    sigma_inv, n_indep, eigen = Math.crpinv(sigma, cutoff, regularization)
    w = d(rho, sigma_inv)
    s = math.sqrt(d(rho, w)) if f else math.sqrt(d(w, d(sigma, w)))
    return w, s, n_indep, eigen
示例#3
0
def _get_multi(geno, typed, cutoff, regularization):
    cov = numpy.cov(geno)
    sigma_tt = cov[:typed.shape[0], :typed.shape[0]]
    sigma_it = cov[typed.shape[0]:, :typed.shape[0]]
    sigma_inv, n_indep, eigen = Math.crpinv(sigma_tt, cutoff, regularization)
    w = d(sigma_it, sigma_inv)
    zscore = d(w, typed.zscore)

    _w = d(sigma_it, sigma_inv)
    variance = numpy.sum(numpy.multiply(sigma_it, _w), axis=1)
    return zscore, variance, sigma_tt.shape[0], n_indep
示例#4
0
def get_Boley_undirected(tp):
    """Boley et al define an undirected graph which "corresponds to" a
    directed graph. Its adjacency matrix is G**s = (Pi * P + P' *
    Pi)/2, where Pi is the steady-state set out along a diagonal and P
    is the transition probability matrix. But we will get the
    transition probability matrix:

    P**s = (P + inv(Pi) * P.T * Pi) / 2

    Note that this matrix is not necessarily symmetric.
    """
    from numpy import dot as d
    P = tp
    Pi = np.diag(get_steady_state(tp))
    return (P + d(d(np.linalg.inv(Pi), P.T), Pi)) / 2.0
def association(gene, context, return_snps=False):
    #capture context
    n_snps_in_model, i, cov, snps = context.provide_calculation(gene)

    if logging.getLogger().getEffectiveLevel() < 10:
        d_ = numpy.linalg.eig(cov)[0]
        if numpy.sum(numpy.less(d_, 1e-6)):
            logging.info("Gene %s has covariance close to singular", gene)

    #some stats
    snps_used = i[Constants.SNP]
    n_snps_used = len(snps_used)
    n_snps_in_cov = context.get_n_in_covariance(gene)

    zscore, effect_size, sigma_g_2 = numpy.nan, numpy.nan, numpy.nan
    if n_snps_used > 0:
        i_weight = i[WDBQF.K_WEIGHT]
        i_zscore = i[Constants.ZSCORE]
        i_beta = i[Constants.BETA]
        # sigma from reference
        variances = numpy.diag(cov)
        i_sigma_l = numpy.sqrt(variances)

        #da calcooleishon
        sigma_g_2 = float(d(d(i_weight, cov), i_weight))

        if sigma_g_2 > 0:
            try:
                zscore = numpy.sum(
                    i_weight * i_zscore * i_sigma_l) / numpy.sqrt(sigma_g_2)
                effect_size = numpy.sum(i_weight * i_beta *
                                        (i_sigma_l**2)) / sigma_g_2
            except Exception as e:
                logging.log(
                    9, "Unexpected exception when calculating zscore: %s, %s",
                    gene, str(e))

    r = (gene, zscore, effect_size, sigma_g_2, n_snps_in_model, n_snps_in_cov,
         n_snps_used)

    if return_snps:
        return r, set(snps_used)
    else:
        return r
示例#6
0
def _get_z(variants, ids, gwas_slice, cutoff, regularization, f=True):
    w, s, n_indep, eigen = _get(variants, ids, cutoff, regularization, f=f)
    z = d(w, gwas_slice.zscore) / s if s > 0 else None
    return z, s, n_indep, eigen
示例#7
0
def RSP_and_FE_distances(A, beta, C=None):
    """Calculate the randomised shortest path distance and free-energy
    distance, as defined in "Developments in the theory of randomized
    shortest paths with a comparison of graph node distances",
    Kivim\"{a}ki, Shimbo, and Saerens, Physica A, 2013.

    Arguments
    
    - A:     Adjacency matrix, whose elements represent affinities between
             nodes, which define the reference transition
             probabilities. A can be asymmetric (for directed graphs).
             Distances between nodes that are not strongly connected
             are Inf.
             
    - beta   beta should lie more or less between 10^-8 and 20, but this 
             depends on the size of the graph and the magnitude of the
             costs. When beta --> 0, we obtain the commute cost distances.
             When beta --> \infty, we obtain the shortest path (lowest
             cost) distances.
             
    - C      Cost matrix, whose elements represent the cost of traversing
             an edge of the graph. Infinite costs can be marked as zeros
             (zero costs are anyway not allowed). If C is not provided,
             then the costs will be set by default as c_ij = 1/a_ij.
    
    Returns D_RSP: the RSP dissimilarity matrix
            D_FE:  the free energy distance matrix
    
    Original Matlab code and comments (c) Ilkka Kivim\"{a}ki 2013
    
    Transliterated to Python/Numpy by James McDermott
    <*****@*****.**>. Helpful guides to this type of
    transliteration:
    http://mathesaurus.sourceforge.net/matlab-numpy.html,
    http://wiki.scipy.org/NumPy_for_Matlab_Users,
    http://wiki.scipy.org/Tentative_NumPy_Tutorial
    """

    max = np.finfo('d').max
    eps = 0.00000001

    # If A is integer-valued, and beta is floating-point, can get an
    # error in the matrix inversion, so convert A to float here. I
    # can't explain why beta being floating-point is related to the
    # problem. Anyway, this also converts in case it was a matrix, or
    # was sparse.
    A = np.array(A, dtype=np.float)

    A[A < eps] = 0.0
    n, m = A.shape
    if n != m:
        raise ValueError("The input matrix A must be square")

    if C is None:
        C = A.copy()
        C[A >= eps] = 1.0/A[A >= eps]
        C[A < eps] = max

    # check beta value?
    if beta < eps or beta > 20.0:
        raise ValueError("The value for beta is outside the expected range, 0 to 20.0")

    ones = np.ones(n)
    onesT = np.ones((n, 1))
    I = np.eye(n)

    # Computation of Pref, the reference transition probability matrix
    tmp = A.copy()
    s = np.sum(tmp, 1)
    s[s == 0] = 1 # avoid zero-division
    Pref = tmp / (s * onesT).T

    # Computation of the W and Z matrices
    W = np.exp(-beta * C) * Pref

    # compute Z
    Z = linalg.inv(I - W)

    # Computation of Z*(C.*W)*Z avoiding zero-division errors:
    numerator = d(d(Z, (C * W)), Z)
    D_nonabs = np.zeros((n, n))
    
    indx = (numerator > 0) & (Z > 0)
    D_nonabs[indx] = numerator[indx] / Z[indx]
    D_nonabs[~indx] = np.infty
    # D_nonabs above actually gives the expected costs of non-hitting paths
    # from i to j.

    # Expected costs of hitting paths -- avoid a possible inf-inf
    # which can arise with isolated nodes and would give a NaN -- we
    # prefer to have inf in that case.
    C_RSP = np.zeros((n, n))
    diag_D = d(onesT, np.diag(D_nonabs).reshape((1, n)))
    indx = ~np.isinf(diag_D)
    C_RSP[indx] = D_nonabs[indx] - diag_D[indx]
    C_RSP[~indx] = np.infty

    # symmetrization
    D_RSP = 0.5 * (C_RSP + C_RSP.T)

    # Free energies and symmetrization:
    Dh_1 = np.diag(1.0/np.diag(Z))
    Zh = d(Z, Dh_1)

    # If there any 0 values in Zh (because of isolated nodes), taking
    # log will raise a divide-by-zero error -- ignore it
    np.seterr(divide='ignore')
    FE = -np.log(Zh)/beta
    np.seterr(divide='raise')
    D_FE = 0.5 * (FE + FE.T)

    # Just in case, set diagonals to zero:
    np.fill_diagonal(D_RSP, 0.0)
    np.fill_diagonal(D_FE, 0.0)

    return D_RSP, D_FE