Exemplo n.º 1
0
def calculate_kinship_old(genotype_matrix, temp_data=None):
    """
    genotype_matrix is an n x m matrix encoding SNP minor alleles.
    
    This function takes a matrix oF SNPs, imputes missing values with the maf,
    normalizes the resulting vectors and returns the RRM matrix.
    
    """
    print("call calculate_kinship_old")
    n = genotype_matrix.shape[0]
    m = genotype_matrix.shape[1]
    print("genotype 2D matrix n (inds) is:", n)
    print("genotype 2D matrix m (snps) is:", m)
    assert m>n, "n should be larger than m (snps>inds)"
    keep = []
    for counter in range(m):
        #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
        #Checks if any values in column are not numbers
        not_number = np.isnan(genotype_matrix[:,counter])
        
        #Gets vector of values for column (no values in vector if not all values in col are numbers)
        marker_values = genotype_matrix[True - not_number, counter]
        #print("marker_values is:", pf(marker_values))
        
        #Gets mean of values in vector
        values_mean = marker_values.mean()

        genotype_matrix[not_number,counter] = values_mean
        vr = genotype_matrix[:,counter].var()
        if vr == 0:
            continue
        keep.append(counter)
        genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
        
        percent_complete = int(round((counter/m)*45))
        if temp_data != None:
            temp_data.store("percent_complete", percent_complete)
        
    genotype_matrix = genotype_matrix[:,keep]
    print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
    kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
    return kinship_matrix,genotype_matrix
Exemplo n.º 2
0
def GWAS(pheno_vector,
         genotype_matrix,
         kinship_matrix,
         kinship_eigen_vals=None,
         kinship_eigen_vectors=None,
         covariate_matrix=None,
         restricted_max_likelihood=True,
         refit=False,
         temp_data=None):
    """
    Performs a basic GWAS scan using the LMM.  This function
    uses the LMM module to assess association at each SNP and
    does some simple cleanup, such as removing missing individuals
    per SNP and re-computing the eigen-decomp

    pheno_vector - n x 1 phenotype vector
    genotype_matrix - n x m SNP matrix
    kinship_matrix - n x n kinship matrix
    kinship_eigen_vals, kinship_eigen_vectors = linalg.eigh(K) - or the eigen vectors and values for K
    covariate_matrix - n x q covariate matrix
    restricted_max_likelihood - use restricted maximum likelihood
    refit - refit the variance component for each SNP
    
    """
    if kinship_eigen_vals == None:
        kinship_eigen_vals = []
    if kinship_eigen_vectors == None:
        kinship_eigen_vectors = []
    
    n = genotype_matrix.shape[0]
    m = genotype_matrix.shape[1]

    if covariate_matrix == None:
        covariate_matrix = np.ones((n,1))

    # Remove missing values in pheno_vector and adjust associated parameters
    v = np.isnan(pheno_vector)
    if v.sum():
        keep = True - v
        pheno_vector = pheno_vector[keep]
        #genotype_matrix = genotype_matrix[keep,:]
        #covariate_matrix = covariate_matrix[keep,:]
        #kinship_matrix = kinship_matrix[keep,:][:,keep]
        kinship_eigen_vals = []
        kinship_eigen_vectors = []

    lmm_ob = LMM(pheno_vector,
                 kinship_matrix,
                 kinship_eigen_vals,
                 kinship_eigen_vectors,
                 covariate_matrix)
    if not refit:
        lmm_ob.fit()

    p_values = []
    t_statistics = []
    
    n = genotype_matrix.shape[0]
    m = genotype_matrix.shape[1]
    
    for counter in range(m):
        x = genotype_matrix[:,counter].reshape((n, 1))
        v = np.isnan(x).reshape((-1,))
        if v.sum():
            keep = True - v
            xs = x[keep,:]
            if xs.var() == 0:
                p_values.append(0)
                t_statistics.append(np.nan)
                continue

            pheno_vector = pheno_vector[keep]
            covariate_matrix = covariate_matrix[keep,:]
            kinship_matrix = kinship_matrix[keep,:][:,keep]
            lmm_ob_2 = LMM(pheno_vector,
                           kinship_matrix,
                           X0=covariate_matrix)
            if refit:
                lmm_ob_2.fit(X=xs)
            else:
                lmm_ob_2.fit()
            ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood)
        else:
            if x.var() == 0:
                p_values.append(0)
                t_statistics.append(np.nan)
                continue

            if refit:
                lmm_ob.fit(X=x)
            ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
            
        percent_complete = 45 + int(round((counter/m)*55))
        temp_data.store("percent_complete", percent_complete)

        p_values.append(ps)
        t_statistics.append(ts)

    return t_statistics, p_values