def run_other_new(n, m, pheno_vector, geno, restricted_max_likelihood=True, refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker """ print("Running the new LMM2 engine in run_other_new") print("REML=", restricted_max_likelihood, " REFIT=", refit) # Adjust phenotypes n, Y, keep = phenotype.remove_missing_new(n, pheno_vector) # if options.maf_normalization: # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) # print "MAF replacements: \n",G # if not options.skip_genotype_normalization: # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) geno = geno[:, keep] with Bench("Calculate Kinship"): K, G = calculate_kinship_new(geno) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) # with Bench("Create LMM object"): # lmm_ob = lmm2.LMM2(Y,K) # with Bench("LMM_ob fitting"): # lmm_ob.fit() print("run_other_new genotype_matrix: ", G.shape) print(G) with Bench("Doing GWAS"): t_stats, p_values = gwas.gwas(Y, G, K, restricted_max_likelihood=True, refit=False, verbose=True) Bench().report() return p_values, t_stats
def test_list_delete_top(): liste = list(range(1, m + 1)) with Bench(): for i in range(1, m + 1): liste.remove(i) assert liste == []
def test_list_multiple_cast(): a = [] divisor = 100 with Bench(): block = n // 100 for h in range(1,block+1): a += list(range((h-1)*divisor+1,h*divisor+1)) assert ref == a
def test_list_multiple_comprehension(): a = [] divisor = 100 with Bench(): block = n // 100 for h in range(1,block+1): a += [i for i in range((h-1)*divisor+1,h*divisor+1)] assert ref == a
def test_list_delete_back_slice(): liste = list(range(1, m + 1)) umgekehrteListe = reversed(liste) with Bench(): # Hier wird ein worst case (der ungünstigste Fall) erzeugt, # in dem aus der liste das hinterste Elemente zuerst gelöscht wird, # bis die Liste leer ist. for i in umgekehrteListe: liste = liste[:i - 1] assert liste == []
def run_other_old(pheno_vector, genotype_matrix, restricted_max_likelihood=True, refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker """ print("Running the original LMM engine in run_other (old)") print("REML=", restricted_max_likelihood, " REFIT=", refit) with Bench("Calculate Kinship"): kinship_matrix, genotype_matrix = calculate_kinship_new( genotype_matrix) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) # with Bench("Create LMM object"): # lmm_ob = LMM(pheno_vector, kinship_matrix) # with Bench("LMM_ob fitting"): # lmm_ob.fit() print("run_other_old genotype_matrix: ", genotype_matrix.shape) print(genotype_matrix) with Bench("Doing GWAS"): t_stats, p_values = GWAS(pheno_vector, genotype_matrix.T, kinship_matrix, restricted_max_likelihood=True, refit=False) Bench().report() return p_values, t_stats
def test_list_numpy_arange(): import numpy a = [] with Bench(): a = numpy.arange(1,n+1).tolist() assert ref == a
def test_list_cast(): a = [] with Bench(): a = list(range(1,n+1)) assert ref == a
def test_list_for_comprehension(): a = [] with Bench(): a = [i for i in range(1,n+1)] assert ref == a
def test_list_plus(): a = [] with Bench(): for i in range(1,n+1): a += [i] assert ref == a
def test_list_append(): a = [] with Bench(): for i in range(1,n+1): a.append(i) assert ref == a
def run_human(pheno_vector, covariate_matrix, plink_input_file, kinship_matrix, refit=False): v = np.isnan(pheno_vector) keep = True - v keep = keep.reshape((len(keep), )) identifier = str(uuid.uuid4()) #print("pheno_vector: ", pf(pheno_vector)) #print("kinship_matrix: ", pf(kinship_matrix)) #print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) #lmm_vars = pickle.dumps(dict( # pheno_vector = pheno_vector, # covariate_matrix = covariate_matrix, # kinship_matrix = kinship_matrix #)) #Redis.hset(identifier, "lmm_vars", lmm_vars) #Redis.expire(identifier, 60*60) if v.sum(): pheno_vector = pheno_vector[keep] print("pheno_vector shape is now: ", pf(pheno_vector.shape)) covariate_matrix = covariate_matrix[keep, :] print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) print("keep is: ", pf(keep.shape)) kinship_matrix = kinship_matrix[keep, :][:, keep] print("kinship_matrix:", pf(kinship_matrix)) n = kinship_matrix.shape[0] print("n is:", n) lmm_ob = LMM(pheno_vector, kinship_matrix, covariate_matrix) lmm_ob.fit() # Buffers for pvalues and t-stats p_values = [] t_stats = [] #print("input_file: ", plink_input_file) with Bench("Opening and loading pickle file"): with gzip.open(plink_input_file, "rb") as input_file: data = pickle.load(input_file) plink_input = data['plink_input'] #plink_input.getSNPIterator() with Bench("Calculating numSNPs"): total_snps = data['numSNPs'] with Bench("snp iterator loop"): count = 0 with Bench("Create list of inputs"): inputs = list(plink_input) with Bench("Divide into chunks"): results = chunks.divide_into_chunks(inputs, 64) result_store = [] key = "plink_inputs" # Todo: Delete below line when done testing Redis.delete(key) timestamp = datetime.datetime.utcnow().isoformat() # Pickle chunks of input SNPs (from Plink interator) and compress them #print("Starting adding loop") for part, result in enumerate(results): #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) holder = pickle.dumps( dict(identifier=identifier, part=part, timestamp=timestamp, result=result), pickle.HIGHEST_PROTOCOL) #print("Adding:", part) Redis.rpush(key, zlib.compress(holder)) #print("End adding loop") #print("***** Added to {} queue *****".format(key)) for snp, this_id in plink_input: #with Bench("part before association"): #if count > 1000: # break count += 1 progress("human", count, total_snps) #with Bench("actual association"): ps, ts = human_association(snp, n, keep, lmm_ob, pheno_vector, covariate_matrix, kinship_matrix, refit) #with Bench("after association"): p_values.append(ps) t_stats.append(ts) return p_values, t_stats