def compute_snp(collect): snp = collect[0] id = collect[1] # result = [] # Check SNPs for missing values x = snp[keep].reshape((n,1)) # all the SNPs v = np.isnan(x).reshape((-1,)) if v.sum(): keeps = True - v xs = x[keeps,:] if keeps.sum() <= 1 or xs.var() <= 1e-6: # PS.append(np.nan) # TS.append(np.nan) # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # continue return formatResult(id,np.nan,np.nan,np.nan,np.nan) # Its ok to center the genotype - I used options.normalizeGenotype to # force the removal of missing genotypes as opposed to replacing them with MAF. if not options.normalizeGenotype: xs = (xs - xs.mean()) / np.sqrt(xs.var()) Ys = Y[keeps] X0s = X0[keeps,:] Ks = K[keeps,:][:,keeps] if options.kfile2: K2s = K2[keeps,:][:,keeps] Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s) else: Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose) if options.refit: Ls.fit(X=xs,REML=options.REML) else: #try: Ls.fit(REML=options.REML) #except: pdb.set_trace() ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True) else: if x.var() == 0: # PS.append(np.nan) # TS.append(np.nan) # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values return formatResult(id,np.nan,np.nan,np.nan,np.nan) # continue if options.refit: L.fit(X=x,REML=options.REML) # This is where it happens ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True) return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
def fitTwo(y, K1, K2, X0=None, wgrids=100): ''' Simple function to fit a model with two variance components. It works by running the standard pylmm algorithm in a loop where at each iteration of the loop a new kinship is generated as a linear combination of the original two. ''' # Create a uniform grid W = np.array(range(wgrids)) / float(wgrids) Res = [] LLs = [] for w in W: # heritability will be estimated for linear combo of kinships K = w * K1 + (1.0 - w) * K2 sys.stderr.write("Fitting weight %0.2f\n" % (w)) L = LMM(y, K, X0=X0) R = L.fit() Res.append(R) LLs.append(R[-1]) del K L = np.array(LLs) i = np.where(L == L.max())[0] if len(i) > 1: sys.stderr.write("WARNING: Found multiple maxes using first one\n") i = i[0] hmax, beta, sigma, LL = Res[i] w = W[i] h1 = w * hmax h2 = (1.0 - w) * hmax e = (1.0 - hmax) return h1, h2, e, beta, sigma, LL
def fitTwo(y,K1,K2,X0=None,wgrids=100): ''' Simple function to fit a model with two variance components. It works by running the standard pylmm algorithm in a loop where at each iteration of the loop a new kinship is generated as a linear combination of the original two. ''' # Create a uniform grid W = np.array(range(wgrids)) / float(wgrids) Res = [] LLs = [] for w in W: # heritability will be estimated for linear combo of kinships K = w*K1 + (1.0 - w)*K2 sys.stderr.write("Fitting weight %0.2f\n" % (w)) L = LMM(y,K,X0=X0) R = L.fit() Res.append(R) LLs.append(R[-1]) del K L = np.array(LLs) i = np.where(L == L.max())[0] if len(i) > 1: sys.stderr.write("WARNING: Found multiple maxes using first one\n") i = i[0] hmax,beta,sigma,LL = Res[i] w = W[i] h1 = w * hmax h2 = (1.0 - w) * hmax e = (1.0 - hmax) return h1,h2,e,beta,sigma,LL
def compute_snp(j, snp_ids, q=None): # print(j,len(snp_ids),"\n") result = [] for snp_id in snp_ids: # j,snp_id = collect snp, id = snp_id # id = collect[1] # result = [] # Check SNPs for missing values x = snp[keep].reshape((n, 1)) # all the SNPs v = np.isnan(x).reshape((-1, )) if v.sum(): # NOTE: this code appears to be unreachable! if options.verbose: sys.stderr.write("Found missing values in " + str(x)) keeps = True - v xs = x[keeps, :] if keeps.sum() <= 1 or xs.var() <= 1e-6: # PS.append(np.nan) # TS.append(np.nan) # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # continue result.append(formatResult(id, np.nan, np.nan, np.nan, np.nan)) continue # Its ok to center the genotype - I used options.normalizeGenotype to # force the removal of missing genotypes as opposed to replacing them with MAF. if not options.normalizeGenotype: xs = (xs - xs.mean()) / np.sqrt(xs.var()) Ys = Y[keeps] X0s = X0[keeps, :] Ks = K[keeps, :][:, keeps] if options.kfile2: K2s = K2[keeps, :][:, keeps] Ls = LMM_withK2(Ys, Ks, X0=X0s, verbose=options.verbose, K2=K2s) else: Ls = LMM(Ys, Ks, X0=X0s, verbose=options.verbose) if options.refit: Ls.fit(X=xs, REML=options.REML) else: #try: Ls.fit(REML=options.REML) #except: pdb.set_trace() ts, ps, beta, betaVar = Ls.association(xs, REML=options.REML, returnBeta=True) else: if x.var() == 0: # Note: this code appears to be unreachable! # PS.append(np.nan) # TS.append(np.nan) # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values result.append(formatResult(id, np.nan, np.nan, np.nan, np.nan)) continue if options.refit: L.fit(X=x, REML=options.REML) # This is where it happens ts, ps, beta, betaVar = L.association(x, REML=options.REML, returnBeta=True) result.append(formatResult(id, beta, np.sqrt(betaVar).sum(), ts, ps)) # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)]) # print [j,result[0]]," in result queue\n" if not q: q = compute_snp.q q.put([j, result]) return j
sys.stderr.write("Loading pre-computed eigendecomposition...\n") Kva = np.load(options.eigenfile + ".Kva") Kve = np.load(options.eigenfile + ".Kve") else: Kva = [] Kve = [] # CREATE LMM object for association n = K.shape[0] if not options.kfile2: L = LMM(Y, K, Kva, Kve, X0, verbose=options.verbose) else: L = LMM_withK2(Y, K, Kva, Kve, X0, verbose=options.verbose, K2=K2) # Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here if not options.refit: if options.verbose: sys.stderr.write("Computing fit for null model\n") L.fit() if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH, L.optSigma)) if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH, L.optSigma, L.optW)) def compute_snp(j, snp_ids, q=None): # print(j,len(snp_ids),"\n") result = [] for snp_id in snp_ids: # j,snp_id = collect snp, id = snp_id # id = collect[1]
if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n") Kva = np.load(options.eigenfile + ".Kva") Kve = np.load(options.eigenfile + ".Kve") else: Kva = [] Kve = [] # CREATE LMM object for association n = K.shape[0] if not options.kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose) else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2) # Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here if not options.refit: if options.verbose: sys.stderr.write("Computing fit for null model\n") L.fit() if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma)) if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW)) # Buffers for pvalues and t-stats PS = [] TS = [] count = 0 out = open(outFile,'w') printOutHead() for snp,id in IN: count += 1 if options.verbose and count % 1000 == 0: sys.stderr.write("At SNP %d\n" % count)
def compute_snp(j,snp_ids,q = None): # print(j,len(snp_ids),"\n") result = [] for snp_id in snp_ids: # j,snp_id = collect snp,id = snp_id # id = collect[1] # result = [] # Check SNPs for missing values x = snp[keep].reshape((n,1)) # all the SNPs v = np.isnan(x).reshape((-1,)) if v.sum(): # NOTE: this code appears to be unreachable! if options.verbose: sys.stderr.write("Found missing values in "+str(x)) keeps = True - v xs = x[keeps,:] if keeps.sum() <= 1 or xs.var() <= 1e-6: # PS.append(np.nan) # TS.append(np.nan) # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # continue result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) continue # Its ok to center the genotype - I used options.normalizeGenotype to # force the removal of missing genotypes as opposed to replacing them with MAF. if not options.normalizeGenotype: xs = (xs - xs.mean()) / np.sqrt(xs.var()) Ys = Y[keeps] X0s = X0[keeps,:] Ks = K[keeps,:][:,keeps] if options.kfile2: K2s = K2[keeps,:][:,keeps] Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s) else: Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose) if options.refit: Ls.fit(X=xs,REML=options.REML) else: #try: Ls.fit(REML=options.REML) #except: pdb.set_trace() ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True) else: if x.var() == 0: # Note: this code appears to be unreachable! # PS.append(np.nan) # TS.append(np.nan) # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) continue if options.refit: L.fit(X=x,REML=options.REML) # This is where it happens ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True) result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)) # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)]) # print [j,result[0]]," in result queue\n" if not q: q = compute_snp.q q.put([j,result]) return j
if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n") Kva = np.load(options.eigenfile + ".Kva") Kve = np.load(options.eigenfile + ".Kve") else: Kva = [] Kve = [] # CREATE LMM object for association n = K.shape[0] if not options.kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose) else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2) # Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here if not options.refit: if options.verbose: sys.stderr.write("Computing fit for null model\n") L.fit() if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma)) if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW)) def compute_snp(j,snp_ids,q = None): # print(j,len(snp_ids),"\n") result = [] for snp_id in snp_ids: # j,snp_id = collect snp,id = snp_id # id = collect[1] # result = [] # Check SNPs for missing values x = snp[keep].reshape((n,1)) # all the SNPs v = np.isnan(x).reshape((-1,)) if v.sum():
if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n") Kva = np.load(options.eigenfile + ".Kva") Kve = np.load(options.eigenfile + ".Kve") else: Kva = [] Kve = [] # CREATE LMM object for association n = K.shape[0] if not options.kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose) else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2) # Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here #out = open(outFile,'w') ### Joo Change start if not options.refit: if options.verbose: sys.stderr.write("Computing fit for null model\n") L.fit() if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma)) #out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH, (1-L.optH))) out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH*L.optSigma, L.optSigma*(1-L.optH))) #out.write("heritability=%0.5f, sigma=%0.5f\n" % (L.optH,L.optSigma)) #out.write("varG=%0.5f, varE=%0.5f\n" %(L.optH*L.optSigma, L.optSigma*(1-L.optH))) if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW)) #out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH, (1-L.optH))) out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH*L.optSigma, L.optSigma*(1-L.optH))) #out.write("heritability=%0.5f, sigma=%0.5f\n" % (L.optH,L.optSigma)) #out.write("varG=%0.5f, varE=%0.5f\n" %(L.optH*L.optSigma, L.optSigma*(1-L.optH))) # Buffers for pvalues and t-stats PS = []