def plotSurf(): from scipy import interpolate a=pd.read_pickle(utl.outpath+'real/real.maxLikelihoods.df') idx=(a.s.abs()*a.h.abs()*(a.alt-a.null)).sort_values().index[-1] R=pd.DataFrame(pd.read_pickle(utl.outpath+'real/real.replicates.df').loc[idx]).T SH=dta.getSH() ARGS=[(R,)+sh for sh in SH] likelihoods=pd.concat(map(mkv.computeLikelihoodReal,ARGS),axis=1);likelihoods.columns.names=['s','h'] fig = plt.figure() ax = fig.gca(projection='3d') df=pd.concat([pd.Series(z[1].loc[z[0]].values,index=z[1].loc[z[0]].index,name=z[0]) for z in b.groupby(level=0)],axis=1) Z=df.values # Z[Z==Z.min()]=-1e3 X=np.tile(df.index.values[:,None],Z.shape[1]) Y=np.tile(df.columns.values[:,None],Z.shape[0]).T Z.min() Z.max() nn = 401; xi = np.linspace(-1.0, 2.0, 10); yi = np.linspace(-0.5, 0.5, nn); f = interpolate.interp2d(X,Y,Z,kind='cubic') zi = f(xi, yi) [xi, yi] = np.meshgrid(xi, yi); # surf = ax.plot_surface(X, Y, Z, cmap=mpl.cm.autumn) surf = ax.plot_surface(xi, yi, zi, cmap=mpl.cm.autumn) fig.colorbar(surf, shrink=0.5, aspect=5) # surf(xi, yi, zi, 'LineStyle', 'none', 'FaceColor', 'interp') plt.show()
def computeBaseSFS(recompute=False): path = utl.outpath + 'real/SFS.F0.df' if not os.path.exists(path) or recompute: x0 = dta.getBaseFreq() import popgen.Estimate as est sfs = utl.scanGenome(x0, lambda x: est.Estimate.getEstimate(x=x, n=1000, method='all', selectionPredictor=True)).apply( lambda x: pd.Series(x[0]), axis=1) sfs.to_pickle(path) return sfs else: return pd.read_pickle(path)
def SNPscan(R,regAlpha,numProcess): reload(dta) SH=dta.getSH(sparse=True) ARGS=[(R,)+sh for sh in SH] print pd.DataFrame(SH) print R if numProcess==1: likelihoods=pd.concat(map(mkv.computeLikelihoodReal,ARGS),axis=1);likelihoods.columns.names=['s','h'] else: pool=Pool(numProcess) likelihoods=pd.concat(pool.map(mkv.computeLikelihoodReal,ARGS),axis=1);likelihoods.columns.names=['s','h'] pool.terminate() del ARGS gc.collect() likelihoods=mkv.maxLikelihood(likelihoods,regAlpha=regAlpha) gc.collect() return likelihoods
def likelihoodWithDifferentN(N=1000,s=0): T=mkv.computePowerForSandSaveRealData((s,0.5),N=N,save=False) CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T,N=N) return computeLikelihoodReal((CD,E,T)).rename(N)
scan=pd.concat([utl.scanGenome(utl.zpvalgenome(s)).rename('win'),utl.scanGenomeSNP(utl.zpvalgenome(s)).rename('snp')],1) pplt.Manhattan(scan) pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome(s)))) pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(scores.abs()))) reload(utl) pplt.GenomeChromosomewise(utl.scanGenomeSNP(utl.zpvalgenome2tail(s))) scores.sort_values() pplt.GenomeChromosomewise(utl.scanGenomeSNP(scores.abs(),lambda x: x[x>=x.quantile(0.5)].sum())) df=pd.concat([scores,s],1);df=pd.concat([df,df.rank()],1,keys=['val','rank']).sort_values(('val','s')) dfy=pd.concat([df,y],1).dropna() dfy.sort_values(0) i=df.index[-1]; cdi=cdAll.loc[i];print cdi.unstack('REP');pplt.plotSiteReal(cdi) cdiun=cdi.unstack('REP') CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T) h=0.5 reload(mkv) mkv.computeLikelihoodReal((CD, E, 0, 0.5)) likes=pd.concat(map(lambda x:mkv.computeLikelihoodReal((CD, E, x, 0.5)),S),keys=S).reset_index().iloc[:,[0,-1]].set_index('level_0')[0] likes[0] reload(pplt) plt.figure(figsize=(6,3),dpi=150);plt.subplot(1,2,1);pd.DataFrame(likes).plot(ax=plt.gca());plt.subplot(1,2,2);pplt.plotSiteReal(cdi,ax=plt.gca());print cdi.unstack('REP') res=res.reset_index().iloc[:,[0,3]];res=res.set_index(res.columns[0]).iloc[:,0] NN=np.arange(100,1500,100) def likelihoodWithDifferentN(N=1000,s=0): T=mkv.computePowerForSandSaveRealData((s,0.5),N=N,save=False)