Пример #1
0
def snpWeights(L, Pi, f, K, t):
    n, m = Pi.shape  # Dimensions
    E = np.empty((n, m), dtype=np.float32)
    covariance_cy.updatePCAngsd(L, Pi, E, t)
    covariance_cy.standardizeE(E, f, t)

    # Performing SVD on normalized expected genotypes
    _, s, U = svds(E, k=K)
    snpW = U[::-1, :].T * (s[::-1]**
                           2) / m  # Scaling by eigenvalues (PC-scores)
    return snpW
Пример #2
0
def selectionScan(L, Pi, f, K, t):
    n, m = Pi.shape  # Dimensions
    E = np.empty((n, m), dtype=np.float32)
    Dsquared = np.empty((m, K), dtype=np.float32)
    covariance_cy.updatePCAngsd(L, Pi, E, t)
    covariance_cy.standardizeE(E, f, t)

    # Performing SVD on normalized expected genotypes
    _, _, U = svds(E, k=K)
    U = U[::-1, :]
    shared.computeD(U, Dsquared)
    return Dsquared
Пример #3
0
def kinshipConomos(L, Pi, t):
    n, m = Pi.shape

    # Initiate containers
    E = np.empty((n, m), dtype=np.float32)
    dKin = np.empty(n, dtype=np.float32)
    temp1 = np.empty((n, n), dtype=np.float32)
    temp2 = np.empty((n, n), dtype=np.float32)

    # Dosages
    covariance_cy.updatePCAngsd(L, Pi, E, t)

    # Kinship computations
    kinship_cy.diagKinship(L, Pi, dKin, t)
    kinship_cy.numeratorKin(E, Pi, t)
    np.dot(E, E.T, out=temp1)
    kinship_cy.denominatorKin(E, Pi, t)
    np.dot(E, E.T, out=temp2)
    temp2 *= 4
    temp1 /= temp2
    np.fill_diagonal(temp1, dKin)  # Insert correct diagonal
    return temp1
Пример #4
0
def pcaEM(L, e, f, m_iter, m_tole, no_std, t):
    n, m = L.shape  # Dimension of likelihood matrix
    n //= 3  # Number of individuals
    K = e

    # Initiate matrices
    E = np.empty((n, m), dtype=np.float32)
    dCov = np.zeros(n, dtype=np.float32)

    # Estimate covariance matrix (Fumagalli) and infer number of PCs
    if K == 0:
        # Prepare dosages and diagonal
        covariance_cy.covFumagalli(L, f, E, dCov, t)
        covariance_cy.standardizeE(E, f, t)
        C = np.dot(E, E.T) / m
        np.fill_diagonal(C, dCov)

        if m_iter == 0:
            print("Returning with ngsTools covariance matrix!")
            return C, None, K

        # Velicer's Minimum Average Partial (MAP) Test
        eigVals, eigVecs = eigsh(C, k=min(
            n - 1, 15))  # Eigendecomposition (Symmetric) - ARPACK
        eigVals = eigVals[::-1]  # Sorted eigenvalues
        eigVals[eigVals < 0] = 0
        eigVecs = eigVecs[:, ::-1]  # Sorted eigenvectors
        loadings = eigVecs * np.sqrt(eigVals)
        mapTest = np.empty(min(m - 1, 15), dtype=np.float32)

        # Loop over m-1 eigenvalues for MAP test (Shriner implementation)
        for eig in range(min(m - 1, 15)):
            partcov = C - (np.dot(loadings[:, 0:(eig + 1)],
                                  loadings[:, 0:(eig + 1)].T))
            d = np.diag(partcov)

            if (np.sum(np.isnan(d)) > 0) or (np.sum(d == 0) >
                                             0) or (np.sum(d < 0) > 0):
                mapTest[eig] = 1
            else:
                d = np.diagflat(1 / np.sqrt(d))
                pr = np.dot(d, np.dot(partcov, d))
                mapTest[eig] = (np.sum(pr**2) - m) / (m * (m - 1))

        K = max([1, np.argmin(mapTest) + 1
                 ])  # Number of principal components retained
        print("Using " + str(K) + " principal components (MAP test)")

        # Release memory
        del eigVals, eigVecs, loadings, partcov, mapTest

    else:
        print("Using " + str(K) + " principal components (manually selected)")
    covariance_cy.updateFumagalli(L, f, E, t)

    # Estimate individual allele frequencies
    Pi = np.empty((n, m), dtype=np.float32)
    covariance_cy.centerE(E, f, t)
    W = estimateSVD(E, K, Pi)
    covariance_cy.updatePi(Pi, f, t)
    prevW = np.copy(W)
    print("Individual allele frequencies estimated (1)")

    # Iterative estimation
    for iteration in range(2, m_iter + 1):
        covariance_cy.updatePCAngsd(L, Pi, E, t)

        # Estimate individual allele frequencies
        covariance_cy.centerE(E, f, t)
        W = estimateSVD(E, K, Pi)
        covariance_cy.updatePi(Pi, f, t)

        # Break iterative update if converged
        diff = covariance_cy.rmse2d_eig(W, prevW)
        print("Individual allele frequencies estimated (" + str(iteration) +
              "). RMSE=" + str(diff))
        if diff < m_tole:
            print("Estimation of individual allele frequencies has converged.")
            break
        prevW = np.copy(W)
    del W, prevW

    # Estimate covariance matrix (PCAngsd)
    if no_std:
        covariance_cy.covPCAngsdNoStd(L, f, Pi, E, dCov, t)
        covariance_cy.centerE(E, f, t)
    else:
        covariance_cy.covPCAngsd(L, f, Pi, E, dCov, t)
        covariance_cy.standardizeE(E, f, t)
    C = np.dot(E, E.T) / m
    np.fill_diagonal(C, dCov)
    del E

    return C, Pi, K
Пример #5
0
if args.maf_save:
    np.save(args.o + ".maf", f.astype(float))
    print("Saved population allele frequencies as " + str(args.o) +
          ".maf.npy (Binary)\n")

if args.indf_save:
    np.save(args.o + ".indf", Pi.astype(float))
    print("Saved individual allele frequencies as " + str(args.o) +
          ".indf.npy (Binary)\n")

if args.dosage_save:
    import covariance_cy
    E = np.empty((L.shape[0] // 3, L.shape[1]),
                 dtype=np.float32)  # Dosage matrix
    covariance_cy.updatePCAngsd(L, Pi, E, args.threads)
    np.save(args.o + ".dosage", E.astype(float))
    print("Saved genotype dosages as " + str(args.o) +
          ".dosage.npy (Binary)\n")
    del E

if args.post_save or args.sites_save:
    print("Loading site information")
    import pandas as pd
    if args.beagle is not None:
        infoDF = pd.read_csv(args.beagle,
                             sep="\t",
                             header=0,
                             usecols=[0, 1, 2],
                             compression="gzip",
                             dtype=str)