Пример #1
0
def runTrradeoffRepTime():
    method='COMALE';L=50000
    numExperiments = 500;
    numThreads = 2;
    Nu = [0.005, 0.1];
    S = [0.05]
    param={'numExperiments':numExperiments,'method':method, 'numThreads': numThreads,'ModelName':'TimeSeries','L':L}
    if method=='COMALE': param['T']=tsutl.loadTransitions()
    print Nu,S,'numThreads=',numThreads
    for numReplicates in [3,5,10,15,20]:
        param['numReplicates']=numReplicates
        for samplingWindow in [10,30,70,100]:
            param['samplingWindow']=samplingWindow
            df=[]
            for nu0 in Nu:
                param['nu0']=nu0
                for s in S:
                    param['s']=s
                    params=getParamsForExperiments(param)
                    if numThreads==1:
                        a=map(runOne,params)
                    else:
                        pool=Pool(numThreads)
                        a=pool.map(runOne,params)
                        pool.terminate()
                    df+=[pd.concat(a)]
                    print  '\nMethod={}\tR={}\twin={}\tnu0={}\ts={}'.format(method, numReplicates,samplingWindow,nu0,s)
            for param in params:param['s']=0;param['nu0']=0.005
            df+=[pd.concat(map(runOne,params))]
            df=pd.concat(df)
            df.sortlevel(inplace=True)
            df.dropna(axis=1,how='all',inplace=True)
            df['rep']=numReplicates
            df['win']=samplingWindow
            df.to_pickle('{}ROC/COMALERep{}Win{}.df'.format(utl.outpath, numReplicates, samplingWindow))
Пример #2
0
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 20});
mpl.rc('text', usetex=True)
reload(Simulation)
reload(utl)
reload(est)


s=1; nu0=-1 # if zero is given it sets selection on A site with Minimum Allele Frequency. i.e. 1/F
FounderLines=150; numReplicates=4
PopulationSize=1000;L=50000
ModelName='SeasonalSNPs'
i = 0;
filename = '{}/{}/msms/L{:.0f}K.{:04.0f}.msms'.format(popgen.Utils.simoutpath, ModelName, L / 1000, i)
initialNeutralGenerations=10
maxGeneration=10
i=0

fig=plt.figure(figsize=(30,35), dpi=60)
for i in range(20):
    plt.subplot(5,4,i+1)
    sim = Simulation.Simulation(initialNeutralGenerations=initialNeutralGenerations, s=s, L=L,
                                numReplicates=numReplicates, initialCarrierFreq=nu0, msmsFile=filename, F=FounderLines,
                                maxGeneration=maxGeneration, generationStep=1, N=PopulationSize, experimentID=i,
                                ModelName=ModelName, ignoreInitialNeutralGenerations=not True, foldInitialAFs=False)
    utl.plotNu(sim,fig=fig,show=False)

plt.suptitle('Four simulations window of {:.0f}Kbp (roughly {:.0f} variants)'.format(L/1000 , round(sim.theta*sum(1./np.arange(1,150)),0)//100 *100))
plt.savefig('/home/arya/Dropbox/SeasonalSNPs/notes/simulationModel.png')
# plt.show()

Пример #3
0
plt.figure()
a = pd.Series([estt.Estimate.LD(sim.H0.astype('float'), measure=measure) for measure in measures], index=measures)
a['Rho'] = a['Rho'] ** 2
df = a.apply
hap = sim.H0.loc[sim.H0[sim.posUnderSelection].idxmax(), sim.H0.loc[sim.H0[sim.posUnderSelection].idxmax()] == 1].index

a.apply(lambda x: x.loc[sim.posUnderSelection]).T.plot(subplots=True, ax=plt.gca());
plt.figure()
for i, m in enumerate(measures):
    plt.subplot(2, 2, i + 1)
    df = pd.concat([a.apply(lambda x: x.loc[sim.posUnderSelection]).T, sim.H0.mean()], axis=1)
    df.plot.scatter(x=0, y=m, ax=plt.gca())
    df.loc[hap].plot.scatter(x=0, y=m, ax=plt.gca(), color='r')


TRANSITIONS = tsutl.loadTransitions()
TRANSITIONSEXP = tsutl.loadTransitions(path=utl.outpath + 'transition/simulation/exp/')
s = 0.025;
i = 0;
depth = 100
HMM = splt.loadHMMAllDepths()
GP = splt.loadGP().LR
FIT = pd.read_pickle(utl.outpath + 'ROC/FIT.df').iloc[:, 0]
CMH = pd.concat([pd.read_pickle(utl.outpath + 'ROC/CMH.30.df'), pd.read_pickle(utl.outpath + 'ROC/CMH.100.df')]).iloc[:,
      0]
EMISSIONS = pd.read_pickle(utl.outpath + 'markov/Emissions.df')

df = pd.concat([(HMM.alt - HMM.null) * HMM.s.apply(np.sign), GP, FIT, CMH]).sort_index().loc[(depth, 0.005)].xs(1,
                                                                                                                level='label')
r = df.groupby(level=range(3)).apply(lambda x: x.rank(ascending=False))
df = pd.concat([df, r], axis=1);
Пример #4
0
import matplotlib as mpl;

import  popgen.Estimate as est
import popgen.TimeSeries.RNN.Utils as utl
import popgen.Utils
from popgen.Utils import Simulation

mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 20});
mpl.rc('text', usetex=True)
reload(Simulation)
reload(utl)
reload(est)


s=1; nu0=-1 # if zero is given it sets selection on A site with Minimum Allele Frequency. i.e. 1/F
FounderLines=150; numReplicates=4
PopulationSize=1000;L=50000
ModelName='SeasonalSNPs'
i = 0;
filename = '{}/{}/msms/L{:.0f}K.{:04.0f}.msms'.format(popgen.Utils.simoutpath, ModelName, L / 1000, i)
initialNeutralGenerations=10
maxGeneration=10
sim = Simulation.Simulation(initialNeutralGenerations=initialNeutralGenerations, s=s, L=L, numReplicates=numReplicates,
                            initialCarrierFreq=nu0, msmsFile=filename, F=FounderLines, maxGeneration=maxGeneration,
                            generationStep=1, N=PopulationSize, experimentID=i, ModelName=ModelName,
                            ignoreInitialNeutralGenerations=True);
utl.plotNu(sim)
Пример #5
0
import numpy as np;

import popgen.Utils

np.set_printoptions(linewidth=40, precision=5, suppress=True)
import pandas as pd;  pd.options.display.max_rows=80;pd.options.display.expand_frame_repr=False;pd.options.display.max_columns=20
import pylab as plt;
import popgen.TimeSeries.RNN.Utils as utl


def D(nu,n=200,theta=200):
    def logn(n):return (1./np.arange(1,n)).sum()
    return (( (1-nu**2)) - map(logn,(1-nu)*n +1)/logn(n))
def D2(nu,n=2000):
    def logn(n):return (1./np.arange(1,n)).sum()
    return - np.array(map(logn,(1-nu)*n +1))/logn(n)
step=0.0005
nut=np.arange(0,1,step)[1:]
N=1000
s=0.1
t=2/s*(utl.logit(nut)-utl.logit(0.0005))+1
t
df=pd.DataFrame([-np.log(1-nut)/np.log(2*N), -(nut**2), -np.log(1-nut)/np.log(2*N) -(nut**2)],index=[r'$-\frac{\log(1-\nu_t)}{\log(2N)}$',r'$-\nu_t^2$',r'$D_t$'] , columns=nut).T
plt.figure(figsize=(20,8));plt.subplot(1,2,1);df.plot(linewidth=2,grid=True,ax=plt.gca());plt.xlabel(r'$\nu$');plt.subplot(1,2,2);df.plot(linewidth=2,ax=plt.gca(),grid=True);plt.xlim([0,0.2]);plt.ylim([-0.1,0.1]);plt.xlabel(r'$\nu$')
plt.savefig(popgen.Utils.paperpath + 'tdterms.png')
Пример #6
0
# D=est.Estimate.LD(M).round(2)
# E=D.copy(True)
# D
# E[E<0]=None
# E
# F=E.applymap(lambda x: x**200).sum(1)
# F2=D.applymap(lambda x: x**200).sum(1)
# F.rolling(window=1000,center=True).mean().plot();plt.axvline(M.columns[ba_ind],color='r');
# G=pd.concat([D.abs().sum(1),M.mean()],axis=1)
# G.sort_values([0])
idx=(M.mean()>0.1)
N=M.iloc[:,idx.values]
ba_indn=np.where(N.columns==500000)[0][0]


def plotSite(M,site,ba_ind):
    plt.figure()
    d=est.Estimate.LD(M,site=site).round(2);
    plt.subplot(3,1,1);d.apply(lambda x: x**200).rolling(window=window,center=True).mean().plot();plt.ylim([0,1.1]);plt.axvline(5*1e5,color='r');plt.subplot(3,1,2);d.abs().rolling(window=window,center=True).mean().plot();plt.ylim([0,1.1]);plt.axvline(500000,color='r');plt.subplot(3,1,3);d[d>0].rolling(window=window,center=True).mean().plot();plt.ylim([0,1.1]);plt.axvline(5*1e5,color='r')
    plt.suptitle('{} site={}  freq={}'.format(('FP','True')[site==ba_ind], site,M.mean().iloc[site] ))

plotSite(M,ba_ind,ba_ind);site=get_sliding_p(M.values)[1].sort_values().index[-1];plotSite(M,site,ba_ind);d=est.Estimate.LD(M,sites=np.array([ba_ind,site]));print M.mean().loc[d.index];print d; print 'distance: ',pd.Series(M.columns[np.array([ba_ind,site])]).diff().iloc[1]
plotSite(N,ba_indn,ba_indn);site=get_sliding_p(N.values)[1].sort_values().index[-1];plotSite(N,site,ba_indn);d=est.Estimate.LD(N,sites=np.array([ba_indn,site]));print N.mean().loc[d.index];print d ; print 'distance: ',pd.Series(N.columns[np.array([ba_indn,site])]).diff().iloc[1]
import popgen.TimeSeries.RNN.Utils as utl
s=0.05
2*np.log(20000*s)/s
2*2/s*(utl.logit(0.5)-utl.logit(1./20000))


# 3R:16576189..16576198
# 3R:16576189..24755000