def calculate_kinship_old(genotype_matrix, temp_data=None): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. This function takes a matrix oF SNPs, imputes missing values with the maf, normalizes the resulting vectors and returns the RRM matrix. """ print("call calculate_kinship_old") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] print("genotype 2D matrix n (inds) is:", n) print("genotype 2D matrix m (snps) is:", m) assert m>n, "n should be larger than m (snps>inds)" keep = [] for counter in range(m): #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers not_number = np.isnan(genotype_matrix[:,counter]) #Gets vector of values for column (no values in vector if not all values in col are numbers) marker_values = genotype_matrix[True - not_number, counter] #print("marker_values is:", pf(marker_values)) #Gets mean of values in vector values_mean = marker_values.mean() genotype_matrix[not_number,counter] = values_mean vr = genotype_matrix[:,counter].var() if vr == 0: continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) percent_complete = int(round((counter/m)*45)) if temp_data != None: temp_data.store("percent_complete", percent_complete) genotype_matrix = genotype_matrix[:,keep] print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix,genotype_matrix
def GWAS(pheno_vector, genotype_matrix, kinship_matrix, kinship_eigen_vals=None, kinship_eigen_vectors=None, covariate_matrix=None, restricted_max_likelihood=True, refit=False, temp_data=None): """ Performs a basic GWAS scan using the LMM. This function uses the LMM module to assess association at each SNP and does some simple cleanup, such as removing missing individuals per SNP and re-computing the eigen-decomp pheno_vector - n x 1 phenotype vector genotype_matrix - n x m SNP matrix kinship_matrix - n x n kinship matrix kinship_eigen_vals, kinship_eigen_vectors = linalg.eigh(K) - or the eigen vectors and values for K covariate_matrix - n x q covariate matrix restricted_max_likelihood - use restricted maximum likelihood refit - refit the variance component for each SNP """ if kinship_eigen_vals == None: kinship_eigen_vals = [] if kinship_eigen_vectors == None: kinship_eigen_vectors = [] n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] if covariate_matrix == None: covariate_matrix = np.ones((n,1)) # Remove missing values in pheno_vector and adjust associated parameters v = np.isnan(pheno_vector) if v.sum(): keep = True - v pheno_vector = pheno_vector[keep] #genotype_matrix = genotype_matrix[keep,:] #covariate_matrix = covariate_matrix[keep,:] #kinship_matrix = kinship_matrix[keep,:][:,keep] kinship_eigen_vals = [] kinship_eigen_vectors = [] lmm_ob = LMM(pheno_vector, kinship_matrix, kinship_eigen_vals, kinship_eigen_vectors, covariate_matrix) if not refit: lmm_ob.fit() p_values = [] t_statistics = [] n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] for counter in range(m): x = genotype_matrix[:,counter].reshape((n, 1)) v = np.isnan(x).reshape((-1,)) if v.sum(): keep = True - v xs = x[keep,:] if xs.var() == 0: p_values.append(0) t_statistics.append(np.nan) continue pheno_vector = pheno_vector[keep] covariate_matrix = covariate_matrix[keep,:] kinship_matrix = kinship_matrix[keep,:][:,keep] lmm_ob_2 = LMM(pheno_vector, kinship_matrix, X0=covariate_matrix) if refit: lmm_ob_2.fit(X=xs) else: lmm_ob_2.fit() ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood) else: if x.var() == 0: p_values.append(0) t_statistics.append(np.nan) continue if refit: lmm_ob.fit(X=x) ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood) percent_complete = 45 + int(round((counter/m)*55)) temp_data.store("percent_complete", percent_complete) p_values.append(ps) t_statistics.append(ts) return t_statistics, p_values