def runTrradeoffRepTime(): method='COMALE';L=50000 numExperiments = 500; numThreads = 2; Nu = [0.005, 0.1]; S = [0.05] param={'numExperiments':numExperiments,'method':method, 'numThreads': numThreads,'ModelName':'TimeSeries','L':L} if method=='COMALE': param['T']=tsutl.loadTransitions() print Nu,S,'numThreads=',numThreads for numReplicates in [3,5,10,15,20]: param['numReplicates']=numReplicates for samplingWindow in [10,30,70,100]: param['samplingWindow']=samplingWindow df=[] for nu0 in Nu: param['nu0']=nu0 for s in S: param['s']=s params=getParamsForExperiments(param) if numThreads==1: a=map(runOne,params) else: pool=Pool(numThreads) a=pool.map(runOne,params) pool.terminate() df+=[pd.concat(a)] print '\nMethod={}\tR={}\twin={}\tnu0={}\ts={}'.format(method, numReplicates,samplingWindow,nu0,s) for param in params:param['s']=0;param['nu0']=0.005 df+=[pd.concat(map(runOne,params))] df=pd.concat(df) df.sortlevel(inplace=True) df.dropna(axis=1,how='all',inplace=True) df['rep']=numReplicates df['win']=samplingWindow df.to_pickle('{}ROC/COMALERep{}Win{}.df'.format(utl.outpath, numReplicates, samplingWindow))
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 20}); mpl.rc('text', usetex=True) reload(Simulation) reload(utl) reload(est) s=1; nu0=-1 # if zero is given it sets selection on A site with Minimum Allele Frequency. i.e. 1/F FounderLines=150; numReplicates=4 PopulationSize=1000;L=50000 ModelName='SeasonalSNPs' i = 0; filename = '{}/{}/msms/L{:.0f}K.{:04.0f}.msms'.format(popgen.Utils.simoutpath, ModelName, L / 1000, i) initialNeutralGenerations=10 maxGeneration=10 i=0 fig=plt.figure(figsize=(30,35), dpi=60) for i in range(20): plt.subplot(5,4,i+1) sim = Simulation.Simulation(initialNeutralGenerations=initialNeutralGenerations, s=s, L=L, numReplicates=numReplicates, initialCarrierFreq=nu0, msmsFile=filename, F=FounderLines, maxGeneration=maxGeneration, generationStep=1, N=PopulationSize, experimentID=i, ModelName=ModelName, ignoreInitialNeutralGenerations=not True, foldInitialAFs=False) utl.plotNu(sim,fig=fig,show=False) plt.suptitle('Four simulations window of {:.0f}Kbp (roughly {:.0f} variants)'.format(L/1000 , round(sim.theta*sum(1./np.arange(1,150)),0)//100 *100)) plt.savefig('/home/arya/Dropbox/SeasonalSNPs/notes/simulationModel.png') # plt.show()
plt.figure() a = pd.Series([estt.Estimate.LD(sim.H0.astype('float'), measure=measure) for measure in measures], index=measures) a['Rho'] = a['Rho'] ** 2 df = a.apply hap = sim.H0.loc[sim.H0[sim.posUnderSelection].idxmax(), sim.H0.loc[sim.H0[sim.posUnderSelection].idxmax()] == 1].index a.apply(lambda x: x.loc[sim.posUnderSelection]).T.plot(subplots=True, ax=plt.gca()); plt.figure() for i, m in enumerate(measures): plt.subplot(2, 2, i + 1) df = pd.concat([a.apply(lambda x: x.loc[sim.posUnderSelection]).T, sim.H0.mean()], axis=1) df.plot.scatter(x=0, y=m, ax=plt.gca()) df.loc[hap].plot.scatter(x=0, y=m, ax=plt.gca(), color='r') TRANSITIONS = tsutl.loadTransitions() TRANSITIONSEXP = tsutl.loadTransitions(path=utl.outpath + 'transition/simulation/exp/') s = 0.025; i = 0; depth = 100 HMM = splt.loadHMMAllDepths() GP = splt.loadGP().LR FIT = pd.read_pickle(utl.outpath + 'ROC/FIT.df').iloc[:, 0] CMH = pd.concat([pd.read_pickle(utl.outpath + 'ROC/CMH.30.df'), pd.read_pickle(utl.outpath + 'ROC/CMH.100.df')]).iloc[:, 0] EMISSIONS = pd.read_pickle(utl.outpath + 'markov/Emissions.df') df = pd.concat([(HMM.alt - HMM.null) * HMM.s.apply(np.sign), GP, FIT, CMH]).sort_index().loc[(depth, 0.005)].xs(1, level='label') r = df.groupby(level=range(3)).apply(lambda x: x.rank(ascending=False)) df = pd.concat([df, r], axis=1);
import matplotlib as mpl; import popgen.Estimate as est import popgen.TimeSeries.RNN.Utils as utl import popgen.Utils from popgen.Utils import Simulation mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 20}); mpl.rc('text', usetex=True) reload(Simulation) reload(utl) reload(est) s=1; nu0=-1 # if zero is given it sets selection on A site with Minimum Allele Frequency. i.e. 1/F FounderLines=150; numReplicates=4 PopulationSize=1000;L=50000 ModelName='SeasonalSNPs' i = 0; filename = '{}/{}/msms/L{:.0f}K.{:04.0f}.msms'.format(popgen.Utils.simoutpath, ModelName, L / 1000, i) initialNeutralGenerations=10 maxGeneration=10 sim = Simulation.Simulation(initialNeutralGenerations=initialNeutralGenerations, s=s, L=L, numReplicates=numReplicates, initialCarrierFreq=nu0, msmsFile=filename, F=FounderLines, maxGeneration=maxGeneration, generationStep=1, N=PopulationSize, experimentID=i, ModelName=ModelName, ignoreInitialNeutralGenerations=True); utl.plotNu(sim)
import numpy as np; import popgen.Utils np.set_printoptions(linewidth=40, precision=5, suppress=True) import pandas as pd; pd.options.display.max_rows=80;pd.options.display.expand_frame_repr=False;pd.options.display.max_columns=20 import pylab as plt; import popgen.TimeSeries.RNN.Utils as utl def D(nu,n=200,theta=200): def logn(n):return (1./np.arange(1,n)).sum() return (( (1-nu**2)) - map(logn,(1-nu)*n +1)/logn(n)) def D2(nu,n=2000): def logn(n):return (1./np.arange(1,n)).sum() return - np.array(map(logn,(1-nu)*n +1))/logn(n) step=0.0005 nut=np.arange(0,1,step)[1:] N=1000 s=0.1 t=2/s*(utl.logit(nut)-utl.logit(0.0005))+1 t df=pd.DataFrame([-np.log(1-nut)/np.log(2*N), -(nut**2), -np.log(1-nut)/np.log(2*N) -(nut**2)],index=[r'$-\frac{\log(1-\nu_t)}{\log(2N)}$',r'$-\nu_t^2$',r'$D_t$'] , columns=nut).T plt.figure(figsize=(20,8));plt.subplot(1,2,1);df.plot(linewidth=2,grid=True,ax=plt.gca());plt.xlabel(r'$\nu$');plt.subplot(1,2,2);df.plot(linewidth=2,ax=plt.gca(),grid=True);plt.xlim([0,0.2]);plt.ylim([-0.1,0.1]);plt.xlabel(r'$\nu$') plt.savefig(popgen.Utils.paperpath + 'tdterms.png')
# D=est.Estimate.LD(M).round(2) # E=D.copy(True) # D # E[E<0]=None # E # F=E.applymap(lambda x: x**200).sum(1) # F2=D.applymap(lambda x: x**200).sum(1) # F.rolling(window=1000,center=True).mean().plot();plt.axvline(M.columns[ba_ind],color='r'); # G=pd.concat([D.abs().sum(1),M.mean()],axis=1) # G.sort_values([0]) idx=(M.mean()>0.1) N=M.iloc[:,idx.values] ba_indn=np.where(N.columns==500000)[0][0] def plotSite(M,site,ba_ind): plt.figure() d=est.Estimate.LD(M,site=site).round(2); plt.subplot(3,1,1);d.apply(lambda x: x**200).rolling(window=window,center=True).mean().plot();plt.ylim([0,1.1]);plt.axvline(5*1e5,color='r');plt.subplot(3,1,2);d.abs().rolling(window=window,center=True).mean().plot();plt.ylim([0,1.1]);plt.axvline(500000,color='r');plt.subplot(3,1,3);d[d>0].rolling(window=window,center=True).mean().plot();plt.ylim([0,1.1]);plt.axvline(5*1e5,color='r') plt.suptitle('{} site={} freq={}'.format(('FP','True')[site==ba_ind], site,M.mean().iloc[site] )) plotSite(M,ba_ind,ba_ind);site=get_sliding_p(M.values)[1].sort_values().index[-1];plotSite(M,site,ba_ind);d=est.Estimate.LD(M,sites=np.array([ba_ind,site]));print M.mean().loc[d.index];print d; print 'distance: ',pd.Series(M.columns[np.array([ba_ind,site])]).diff().iloc[1] plotSite(N,ba_indn,ba_indn);site=get_sliding_p(N.values)[1].sort_values().index[-1];plotSite(N,site,ba_indn);d=est.Estimate.LD(N,sites=np.array([ba_indn,site]));print N.mean().loc[d.index];print d ; print 'distance: ',pd.Series(N.columns[np.array([ba_indn,site])]).diff().iloc[1] import popgen.TimeSeries.RNN.Utils as utl s=0.05 2*np.log(20000*s)/s 2*2/s*(utl.logit(0.5)-utl.logit(1./20000)) # 3R:16576189..16576198 # 3R:16576189..24755000