def loadData(filename="",sz=100,useRand=False): if len(filename)==0: filename="../../GWAS/cleaned"; [y,sFil]=getData(filename); if useRand: rand.shuffle(y); I=[]; I1=[i for i in range(0,len(y)) if y[i]==1]; I2=[i for i in range(0,len(y)) if y[i]!=1]; I.extend(I1[:sz]); I.extend(I2[:sz]); NI=[]; NI.extend(I1[sz:]) NI.extend(I2[sz:]) y1=[y[i] for i in I] sFil1=sFil[I,:]; y2=[y[i] for i in NI] sFil2=sFil[NI,:]; return [[y1,sFil1],[y2,sFil2]];
def NoisySig(mret,eps,filename,savename=""): if len(savename)==0: savename="Output/res_top_"+str(eps)+"_"+str(mret)+".txt" epsilons=[eps*i for i in range(1,11)]; print "load Data" [y,BED]=ld.getData(filename); print "calc EIGN!" EIGN=EIGN_STRAT(BED,2); sc=EIGN.prod(y); sc=[abs(s) for s in sc]; sc=sorted(sc,reverse=True); print "get True!"; truSNPs=pt(y,EIGN,mret,-1,algor="noise"); our=[0.0 for i in range(1,11)]; score=[0.0 for i in range(1,11)]; noise=[0.0 for i in range(1,11)]; neighbor=[0.0 for i in range(1,11)]; reps=20; for i in range(0,10): e=epsilons[i]; print e; for j in range(0,reps): print j; nosySNPs=pt(y,EIGN,mret,e,algor="DPE",reuse=True); our[i]=our[i]+inter(truSNPs,noisySNPs)/float(reps); noisySNPs=pt(y,MU,mret,e,algor="score"); score[i]=score[i]+inter(truSNPs,noisySNPs)/float(reps); noisySNPs=pt(y,MU,mret,e,algor="noise"); noise[i]=noise[i]+inter(truSNPs,noisySNPs)/float(reps); noisySNPs=pt(y,MU,mret,e,algor="neighbor"); neighbor=neighbor[i]+inter(truSNPs,noisySNPs)/float(reps);
def plotTop(mret,eps,filename,savename=""): if len(savename)==0: savename="OutputDir/LMM_top_"+str(eps)+"_"+str(mret)+".txt" epsilons=[eps*i for i in range(1,11)]; print "load Data" [y,BED]=ld.getData(filename); print "calc MU!" MU=MU_LMM(BED,[1,-1.0]); sc=MU.prod(y); sc=[abs(s) for s in sc]; sc=sorted(sc,reverse=True); print "get True!"; tru=pt(y,MU,mret,-1,algor="noise"); neighs=[0.0 for i in range(1,11)]; score=[0.0 for i in range(1,11)]; noise=[0.0 for i in range(1,11)]; reps=20; for i in range(0,10): e=epsilons[i]; print e; for j in range(0,reps): print j; e1=e*mret/float(mret+1); MU=MU_LMM(BED,[10,e/float(mret+1)]); gs=pt(y,MU,mret,e1,algor="neighbor",reuse=True); neighs[i]=neighs[i]+inter(tru,gs)/float(reps); gs=pt(y,MU,mret,e1,algor="score"); score[i]=score[i]+inter(tru,gs)/float(reps); gs=pt(y,MU,mret,e1,algor="noise"); noise[i]=noise[i]+inter(tru,gs)/float(reps); fil=open(savename,"w"); fil.write("Testing Top SNPs with DP, "+filename); fil.write("\nEpsilon:") for i in range(0,10): fil.write(" "+str(epsilons[i])) fil.write("\nNoise:") for i in range(0,10): fil.write(" "+str(noise[i])) fil.write("\nScore:") for i in range(0,10): fil.write(" "+str(score[i])) fil.write("\nNeighbor:") for i in range(0,10): fil.write(" "+str(neighs[i])) fil.close();
def plotTop(mret, eps, filename, savename=""): if len(savename) == 0: savename = "OutputDir/LMM_top_" + str(eps) + "_" + str(mret) + ".txt" epsilons = [eps * i for i in range(1, 11)] print "load Data" [y, BED] = ld.getData(filename) print "calc MU!" MU = MU_LMM(BED, [1, -1.0]) sc = MU.prod(y) sc = [abs(s) for s in sc] sc = sorted(sc, reverse=True) print "get True!" tru = pt(y, MU, mret, -1, algor="noise") neighs = [0.0 for i in range(1, 11)] score = [0.0 for i in range(1, 11)] noise = [0.0 for i in range(1, 11)] reps = 20 for i in range(0, 10): e = epsilons[i] print e for j in range(0, reps): print j e1 = e * mret / float(mret + 1) MU = MU_LMM(BED, [10, e / float(mret + 1)]) gs = pt(y, MU, mret, e1, algor="neighbor", reuse=True) neighs[i] = neighs[i] + inter(tru, gs) / float(reps) gs = pt(y, MU, mret, e1, algor="score") score[i] = score[i] + inter(tru, gs) / float(reps) gs = pt(y, MU, mret, e1, algor="noise") noise[i] = noise[i] + inter(tru, gs) / float(reps) fil = open(savename, "w") fil.write("Testing Top SNPs with DP, " + filename) fil.write("\nEpsilon:") for i in range(0, 10): fil.write(" " + str(epsilons[i])) fil.write("\nNoise:") for i in range(0, 10): fil.write(" " + str(noise[i])) fil.write("\nScore:") for i in range(0, 10): fil.write(" " + str(score[i])) fil.write("\nNeighbor:") for i in range(0, 10): fil.write(" " + str(neighs[i])) fil.close()
def plotTop(mret,eps,filename,savename=""): if len(savename)==0: savename="OutputDir/res_top_"+str(eps)+"_"+str(mret)+".txt" epsilons=[eps*i for i in range(1,11)]; print "load Data" [y,BED]=ld.getData(filename); print "calc MU!" MU=MU_STRAT(BED,10); sc=MU.prod(y); sc=[abs(s) for s in sc]; sc=sorted(sc,reverse=True); print "get True!"; tru=pt(y,MU,mret,-1,algor="noise"); neighs=[0.0 for i in range(1,11)]; score=[0.0 for i in range(1,11)]; noise=[0.0 for i in range(1,11)]; reps=20; for i in range(0,10): e=epsilons[i]; print e; for j in range(0,reps): print j; gs=pt(y,MU,mret,e,algor="neighbor",reuse=True); neighs[i]=neighs[i]+inter(tru,gs)/float(reps); gs=pt(y,MU,mret,e,algor="score"); score[i]=score[i]+inter(tru,gs)/float(reps); gs=pt(y,MU,mret,e,algor="noise"); noise[i]=noise[i]+inter(tru,gs)/float(reps); fil=open(savename,"w"); fil.write("Testing Top SNPs with DP, "+filename); fil.write("\nEpsilon:") for i in range(0,10): fil.write(" "+str(epsilons[i])) fil.write("\nNoise:") for i in range(0,10): fil.write(" "+str(noise[i])) fil.write("\nScore:") for i in range(0,10): fil.write(" "+str(score[i])) fil.write("\nNeighbor:") for i in range(0,10): fil.write(" "+str(neighs[i])) fil.close();
def plotWald(eps,filename,savename="",k=5): if len(savename)==0: savename="OutputDir/res_wald_"+str(eps[0])+"_"+str(k)+".txt" print "load Data" [y,BED]=ld.getData(filename); print "calc MU!" MU=MU_STRAT(BED,k); print "get True!"; tru=wt(y,MU,-1,snps=[],forFigs=False); fil=open(savename,"w"); n=len(y); for i in range(0,10): e=eps[i]; print e; res=wt(y,MU,e,snps=[],forFigs=True); err=sorted([float(n-k-1)*abs(res[i]-tru[i]) for i in range(0,len(tru))]); m=len(err); med=err[int(.5*m)] up=err[int(.75*m)]; down=err[int(.25*m)]; print med; fil.write(str(e)+" "+str(down)+" "+str(med)+" "+str(up)+"\n"); fil.close();
print "As it stands: " print "BedFile: " + bedFil print "epsilon: " + str(epsilon) print "Type: " + typ if typ != "Count": print "Aglorithm: " + algor if typ == "Top": print "mret: " + str(mret) if typ == "Wald": print "SNPs: " + str(snps) if typ == "Count": print "pvals: " + str(pval) print "\n\n\n" print "Load Data!" [y, BED] = getData(bedFil) print "Calculating MU matrix" if num < 1: MU = MU_LMM(BED, (se2, sg2)) else: MU = MU_LMM(BED, [num, epsilon]) if typ == "Top": PrivGWAS.Top(MU, y, epsilon, mret, algor, savename) elif typ == "Count": PrivGWAS.count(MU, y, epsilon, pval, savename) elif typ == "Wald": PrivGWAS.wald(MU, y, epsilon, snps, savename) elif typ == "Herit": print "The estimated heritability:" print "Sigma_e^2 is " + str(MU.se2)
print "BedFile: "+bedFil; print "epsilon: "+str(epsilon); print "Type: "+typ; if typ!="Count": print "Aglorithm: "+algor; if typ=="Top": print "mret: "+str(mret); if typ=="Wald": print "SNPs: "+str(snps); if typ=="Count": print "threshold: "+str(pval); if exact: print "Use exact method!" print "\n\n\n"; print "Load Data!" [y,BED]=getData(bedFil); print "Calculating MU matrix" MU=MU_STRAT(BED,k); if exact: MU.calcMU(k,exact=True); n=len(y); if typ=="Top": PrivGWAS.Top(MU,y,epsilon,mret,algor,savename,snpList); elif typ=="Count": PrivGWAS.count(MU,y,epsilon,pval,savename); elif typ=="Wald": PrivGWAS.wald(MU,y,epsilon,snps,savename,coeff=float(n-k-1));
print "Type: " + typ if typ != "Count": print "Aglorithm: " + algor if typ == "Top": print "mret: " + str(mret) if typ == "Wald": print "SNPs: " + str(snps) if typ == "Count": print "pvals: " + str(pval) print "\n\n\n" print "Load Data!" y = [] X = [] sFil = [] Q = [] [y, X, Q, sFil] = getData(bedFil, useCov) print "Calculating MU matrix" MU = DP.getMU(y, X, Q=Q, se2=se2, sg2=sg2, k=k, meth=meth) if typ == "Top": picks = DP.PickTopSNP(y, X, mret, epsilon=epsilon, k=k, se2=se2, sg2=sg2, MU=MU, meth=meth, algor=algor) if len(picks) == 0: print "Bad argument!"
print "Method: "+meth; print "BedFile: "+bedFil; print "epsilon: "+str(epsilon); print "Type: "+typ; if typ!="Count": print "Aglorithm: "+algor; if typ=="Top": print "mret: "+str(mret); if typ=="Wald": print "SNPs: "+str(snps); if typ=="Count": print "pvals: "+str(pval); print "\n\n\n"; print "Load Data!" y=[];X=[];sFil=[];Q=[]; [y,X,Q,sFil]=getData(bedFil,useCov); print "Calculating MU matrix" MU=DP.getMU(y,X,Q=Q,se2=se2,sg2=sg2,k=k,meth=meth); if typ=="Top": picks=DP.PickTopSNP(y,X,mret,epsilon=epsilon,k=k,se2=se2,sg2=sg2,MU=MU,meth=meth,algor=algor) if len(picks)==0: print "Bad argument!" return; print "The mret top scoring SNPs:" for i in picks: print sFil.sid[i]; print sFil.pos[i] elif typ=="Wald": picks=sFil.sid_to_index(snps);#[snps.index(i) for i in snps]; Scores=DP.estWald(y,X,epsilon,k=k,snps=picks,MU=MU,meth=meth,algor=algor); print "The estimated Wald scores are:";