def parse_slim_organization(lines, mtypes, elements, mutrate): start = lines.index("#CHROMOSOME ORGANIZATION") end = get_next_block_starts(lines, start) #need to get sum of all weights per mutation type ttlweights = {} for key1 in elements: ttlweights[key1] = 0. for key2 in elements[key1]: ttlweights[key1] = ttlweights[key1] + elements[key1][key2] nregions = [] sregions = [] mun = 0 mus = 0 for i in lines[start + 1:start + 1 + end]: t = i.split() ebeg = float(t[1]) eend = float(t[2]) for key in elements[t[0]]: mt = mtypes[key] weight = elements[t[0]][key] / ttlweights[t[0]] ##In this block, we halve s or mean s, ##and double h, to convert from SLiM's ##fitness model of 1,1+hs,1+s to the ##1,1+sh,1+2s used here. if mt[1] == 'f': if mt[2] == 0.: #is a neutral mutation mun = mun + mutrate * weight * (eend - ebeg + 1.) nregions.append( fwdpy.Region(ebeg - 1., eend, mutrate * weight)) else: mus = mus + mutrate * weight * (eend - ebeg + 1.) sregions.append( fwdpy.ConstantS(ebeg - 1., eend, mutrate * weight, 0.5 * mt[2], 2 * mt[0])) elif mt[1] == 'e': mus = mus + mutrate * weight * (eend - ebeg + 1.) sregions.append( fwdpy.ExpS(ebeg - 1., eend, mutrate * weight, 0.5 * mt[2], 2 * mt[0])) elif mt[1] == 'g': mus = mus + mutrate * weight * (eend - ebeg + 1.) sregions.append( fwdpy.GammaS(ebeg - 1., eend, mutrate * weight, 0.5 * mt[2], mt[3], 2 * mt[0])) else: raise RuntimeError("invalid DFE encountered") return { 'nregions': nregions, 'sregions': sregions, 'mu_neutral': mun, 'mu_selected': mus }
def make_buried_rec_region(littler): """ For a region of size littler in a larger 50cM region, set up the boundaries, assuming littler corresponds to recombination along the interval [0,1) """ rest = 0.5-littler ratio = rest/littler return {'region':[fp.Region(-ratio,1+ratio,1)], 'beg':-ratio, 'end':(1+ratio)}
def parse_slim_recrates(lines): start = lines.index("#RECOMBINATION RATE") end = get_next_block_starts(lines, start) regions = [] recrate = 0. laststart = float(0.0) for i in lines[start + 1:start + 1 + end]: t = i.split() stop = float(t[0]) weight = float(t[1]) if weight > 0.: ##NEED TO DOCUMENT THE + 1 regions.append(fwdpy.Region(laststart, stop, weight)) recrate = recrate + weight * (stop - laststart) laststart = float(stop) return {'recrate': recrate, 'recregions': regions}
from __future__ import print_function #Import fwdpy. Give it a shorter name import fwdpy as fp ##Other libs we need import numpy as np import pandas import math # ### Establishing 'regions' for mutation and recombination # # In[2]: # Where neutral mutations occur: nregions = [fp.Region(beg=0,end=1,weight=1)] # In[3]: # Where selected mutations occur: sregions = [fp.ConstantS(beg=-1,end=0,weight=1,s=-0.05,h=1), fp.ConstantS(beg=1,end=2,weight=1,s=-0.05,h=1)] # In[4]: # Recombination: recregions = [fp.Region(beg=-1,end=2,weight=1)]
#over time, to show relation of simulation #params to HoC approximation #This script varies mu with VS=1 and varies sigma_mu #such that mu*(sigma_mu^2) is constant import fwdpy as fp import fwdpy.qtrait as qt import pandas as pd import numpy as np import math, sys import matplotlib import matplotlib.pyplot as plt nregions = [] rregions = [fp.Region(0, 1, 1)] reference_mu = 1e-3 recrate = 0.5 N = 1000 simlen = 10 * N Nlist = np.array([N] * (simlen), dtype=np.uint32) #grid of VS values. 1 is our reference value relative_mu = [0.25, 0.5, 1.0, 5.0, 10.0] reference_sigma = math.sqrt(0.05) reference_vm = 2.0 * reference_mu * (math.pow(reference_sigma, 2.0)) reference_vg = 4 * reference_mu * 1.0 rng = fp.GSLrng(1525152) #plot VG, ebar, tbar,max_expl over time
#The next three lines process and compile our custom fitness module: import pyximport pyximport.install() import test_fwdpy_extensions.test_custom_fitness as tfp #import fwdpy and numpy as usual import fwdpy as fp import numpy as np rng = fp.GSLrng(101) rngs = fp.GSLrng(202) p = fp.SpopVec(3, 1000) s = fp.NothingSampler(len(p)) n = np.array([1000] * 1000, dtype=np.uint32) nr = [fp.Region(0, 1, 1)] sr = [fp.ExpS(0, 1, 1, 0.1)] #Now, let's do some evolution with our 'custom' fitness functions: fitness = tfp.AdditiveFitnessTesting() fp.evolve_regions_sampler_fitness(rng, p, s, fitness, n, 0.001, 0.001, 0.001, nr, sr, nr, 1) fitness = tfp.AaOnlyTesting() fp.evolve_regions_sampler_fitness(rng, p, s, fitness, n, 0.001, 0.001, 0.001, nr, sr, nr, 1) fitness = tfp.GBRFitness() fp.evolve_regions_sampler_fitness(rng, p, s, fitness, n, 0.001, 0.001, 0.001, nr, sr, nr, 1)
def main(): try: opts, args = getopt.getopt(sys.argv[1:],"m:e:H:S:O:N:s:r:",["fixed=","ages=","traj="]) except getopt.GetoptError as err: # print help information and exit: print(err) # will print something like "option -a not recognized" usage() sys.exit(2) #set up default params N=1000 # pop size e = 0.25 # s.d. of effect sizes S = 1 # V(S) H = None # desired b-sense H^2 m = None # Mutation rate (per gamete, per generation) to alleles affecting trait value r = 0.5 # rec. rate (per diploid, per gen) Opt = 0.0 # Value of optimum after 10N gens fixationsFile=None lostFile=None trajFile=None seed = 0 for o,a in opts: if o == '-m': m = float(a) elif o == '-e': e = float(a) elif o == '-H': H = float(a) elif o == '-S': S = float(a) elif o == '-O': Opt = float(a) elif o == '-N': N=int(a) elif o == '-s': seed = int(a) elif o == '-r': r = float(a) elif o == '--fixed': fixationsFile=a elif o == '--ages': lostFile=a elif o == '--traj': trajFile=a if H is None: usage() sys.exit(2) if m is None: usage() sys.exit(2) if fixationsFile is None or lostFile is None or trajFile is None: usage() sys.exit(2) rng = fp.GSLrng(seed) hdf_fixed = pd.HDFStore(fixationsFile,'w',complevel=6,complib='zlib') hdf_fixed.open() hdf_lost = pd.HDFStore(lostFile,'w',complevel=6,complib='zlib') hdf_lost.open() hdf_traj = pd.HDFStore(trajFile,'w',complevel=6,complib='zlib') hdf_traj.open() sigE = get_sigE_additive(m,S,H) nregions = [] recregions = [fp.Region(0,1,1)] sregions = [fp.GaussianS(0,1,1,e)] #population size over time -- constant & we re-use this over and over nlist = np.array([N]*(10*N),dtype=np.uint32) REPLICATE=0 #16 batches of 64 runs = 1024 replicates for i in range(16): #set up populations pops=fp.SpopVec(64,N) #Evolve to equilibrium sampler = fp.FreqSampler(len(pops)) qt.evolve_regions_qtrait_sampler(rng, pops, sampler, nlist[0:], 0, m, r, nregions,sregions,recregions, sigmaE=sigE, sample=1, VS=S) ##Do not track popstats during "burn-in" traj1=sampler.get() sampler = fp.FreqSampler(len(pops)) #evolve for another 10N generations at new optimum qt.evolve_regions_qtrait_sampler(rng,pops,sampler, nlist[0:], 0, m, r, nregions,sregions,recregions, sigmaE=sigE, VS=S,optimum=Opt,sample=1) traj2=sampler.get() AGES=[] FIXATIONS=[] #merge trajectories and get allele ages (parallelized via open MP) traj1=fp.merge_trajectories(traj1,traj2) ages = fp.allele_ages(traj1) REPTEMP=REPLICATE for ai in range(len(ages)): dfi=pd.DataFrame(ages[ai]) dfi['rep']=[REPTEMP]*len(dfi.index) FIXATIONS.append(dfi[dfi['max_freq']==1.0]) AGES.append(dfi[dfi['max_freq']<1.0]) REPTEMP+=1 # for j in range(len(pops)): # #Merge all trajectories for this replicate # df = pd.concat([pd.DataFrame(traj1[j]), # pd.DataFrame(traj2[j])]) # for name,group in df.groupby(['pos','esize']): # if group.freq.max() < 1: #mutation did not reach fixation... # if group.generation.max()-group.generation.min()>1: #... and it lived > 1 generation ... # AGES.append({'rep':REPLICATE, # 'esize':name[1], # 'origin':group.generation.min(), # 'final_g':group.generation.max(), # 'max_q':group.freq.max(), # 'last_q':group.freq.iloc[-1]}) # else: #mutation did reach fixation! # FIXATIONS.append({'rep':REPLICATE, # 'esize':name[1], # 'origin':group.generation.min(), # 'final_g':group.generation.max()}) # REPLICATE+=1 ##Add more info into the trajectories for t in traj1: LD=[] for i in t: I=int(0) for j in i[1]: x=copy.deepcopy(i[0]) x['freq']=j x['generation']=i[0]['origin']+I I+=1 LD.append(x) d=pd.DataFrame(LD) d['rep']=[REPLICATE]*len(d.index) REPLICATE+=1 hdf_traj.append('trajectories',d) hdf_fixed.append('fixations',pd.concat(FIXATIONS)) hdf_lost.append('allele_ages',pd.concat(AGES)) hdf_fixed.close() hdf_lost.close() hdf_traj.close()
def make_neutral_region(): return [fp.Region(0,1,1)]
import unittest import fwdpy import numpy as np nregions = [fwdpy.Region(0, 1, 1), fwdpy.Region(2, 3, 1)] sregions = [fwdpy.ExpS(1, 2, 1, -0.1), fwdpy.ExpS(1, 2, 0.01, 0.001)] rregions = [fwdpy.Region(0, 3, 1)] rng = fwdpy.GSLrng(100) N = 1000 NGENS = 100 popsizes = np.array([N], dtype=np.uint32) popsizes = np.tile(popsizes, NGENS) pops = fwdpy.evolve_regions(rng, 1, N, popsizes[0:], 0.001, 0.0001, 0.001, nregions, sregions, rregions) #The sum of the gamete counts must be 2*(deme size): #mpops = fwdpy.evolve_regions_split(rng,pops,popsizes[0:],popsizes[0:],0.001,0.0001,0.001,nregions,sregions,rregions,[0]*2) class test_singlepop_views(unittest.TestCase): def testNumGametes(self): gams = fwdpy.view_gametes(pops[0]) nsingle = 0 for i in gams: nsingle += i['n'] self.assertEqual(nsingle, 2000) def testDipsize(self): dips_single = fwdpy.view_diploids(pops[0], [0, 1, 2]) self.assertEqual(len(dips_single), 3)
import fwdpy as fp import numpy as np rng = fp.GSLrng(101) N=1000 nlist=np.array([N]*10*N,dtype=np.uint32) p=fp.evolve_regions(rng, 64,N, nlist, 0.25, 0.0, 0.25, [fp.Region(0,1,1)], [], [fp.Region(0,1,1)]) #p.append(p2) #print len(p) #v = fp.view_diploids_pd(p,range(0,N,1)) #v=[fp.view_diploids(i,range(0,N,1)) for i in p] #b = fp.view_diploids_pd(p,range(0,N,1)) #print b #for i in v: # print i #g = [i.gen() for i in p] #print g #d = fp.view_diploids(p,[0,1]) #print d
NB = 50 NCORES = 40 NPIK = NCORES * NB f = gzip.open(PIK, "wb") N = 1000 scaled_sigmaMU = 250.0 sigMU = scaled_sigmaMU / N theta_n = 100.0 rho_n = 100.0 mun = 0.0 #theta_n/(4*N) littler = rho_n / (4 * N) rest = r - littler ratio = rest / r sample_interval = 0.01 #In units of N generations print sigMU, " ", mun, " ", int(sample_interval * N) neutmutregions = [fp.Region(0, 0.1, 1)] #selmutregions=[fp.GaussianS(-ratio,1+ratio,1,sigMU)] #recregions= [fp.Region(-ratio,1+ratio,1)] selmutregions = [fp.GaussianS(0, 0.1, 1, sigMU)] recregions = [fp.Region(0, 0.1, 1)] pickle.dump(NPIK, f) REP = 0 for i in range(NB): nlist = np.array([N] * (10 * N), dtype=np.uint32) #Evolve to equilibrium pops = fp.popvec(NCORES, N) samples = qt.evolve_qtrait_track(rng, pops, nlist[0:], mun,
def main(): try: opts, args = getopt.getopt(sys.argv[1:], "m:e:H:S:O:N:t:s:F:r:", ["cores=", "batches="]) except getopt.GetoptError as err: # print help information and exit: print(err) # will print something like "option -a not recognized" usage() sys.exit(2) #set up default params N = 1000 # pop size t = None # 0.1N e = 0.25 # s.d. of effect sizes S = 1 # V(S) H = None # desired b-sense H^2 m = None # Mutation rate (per gamete, per generation) to alleles affecting trait value r = 0.5 # rec. rate (per diploid, per gen) Opt = 0.0 # Value of optimum after 10N gens ofile = None seed = 0 ncores = 64 nbatches = 16 for o, a in opts: if o == '-m': m = float(a) elif o == '-e': e = float(a) elif o == '-H': H = float(a) elif o == '-S': S = float(a) elif o == '-O': Opt = float(a) elif o == '-N': N = int(a) elif o == '-t': t = int(a) elif o == '-s': seed = int(a) elif o == '-F': ofile = a elif o == '-r': r = float(a) elif o == '--cores': ncores = int(a) elif o == '--batches': nbatches = int(a) if t is None: t = int(0.1 * float(N)) if H is None: usage() sys.exit(2) if m is None: usage() sys.exit(2) if ofile is None: usage() sys.exit(2) rng = fp.GSLrng(seed) hdf = pd.HDFStore(ofile, 'w', complevel=6, complib='zlib') hdf.open() sigE = get_sigE_additive(m, S, H) nregions = [] recregions = [fp.Region(0, 1, 1)] sregions = [fp.GaussianS(0, 1, 1, e)] #population size over time -- constant & we re-use this over and over nlist = np.array([N] * (10 * N), dtype=np.uint32) #16 batches of 64 runs = 1024 replicates fitness = qt.SpopAdditiveTrait() REPLICATE = 0 for i in range(nbatches): pops = fp.SpopVec(ncores, N) sampler = fp.QtraitStatsSampler(len(pops), 0.0) #Evolve to equilibrium, tracking along the way qt.evolve_regions_qtrait_sampler_fitness(rng, pops, sampler, fitness, nlist[0:], 0, m, r, nregions, sregions, recregions, sigmaE=sigE, sample=t, VS=S, optimum=0) stats = sampler.get() RTEMP = REPLICATE for si in stats: ti = pd.DataFrame(si) ti['rep'] = [RTEMP] * len(ti.index) RTEMP += 1 hdf.append('popstats', ti) #simulate another 10*N generations, sampling stats every 't' generations sampler = fp.QtraitStatsSampler(len(pops), Opt) qt.evolve_regions_qtrait_sampler_fitness(rng, pops, sampler, fitness, nlist[0:], 0, m, r, nregions, sregions, recregions, sigmaE=sigE, sample=t, VS=S, optimum=Opt) stats = sampler.get() for si in stats: ti = pd.DataFrame(si) ti['rep'] = [REPLICATE] * len(ti.index) hdf.append('popstats', ti) REPLICATE += 1 hdf.close()
def main(): parser=make_parser() args=parser.parse_args(sys.argv[1:]) if args.verbose: print (args) ##Figure out sigma_E from params sigE = get_sigE_additive(args.mutrate,args.VS,args.H2) trait = get_trait_model(args.trait) nlist = np.array([args.popsize]*(10*args.popsize),dtype=np.uint32) rng=fp.GSLrng(args.seed) mu_neutral = 0.0 nregions=[] sregions=[fp.GaussianS(0,1,1,args.sigmu,args.dominance)] recregions=[fp.Region(0,1,1)] if args.sampler == 'stats': output=pd.HDFStore(args.outfile,'w',complevel=6,complib='zlib') output.close() REPID=0 for BATCH in range(args.nbatches): pops=fp.SpopVec(args.ncores,args.popsize) sampler=get_sampler_type(args.sampler,args.trait,len(pops),0.0) qt.evolve_regions_qtrait_sampler_fitness(rng,pops,sampler,trait, nlist, 0.0, args.mutrate, args.recrate, nregions, sregions, recregions, args.tsample, sigE, optimum=0.0, VS=args.VS) if args.sampler != 'freq': if args.sampler == 'freq': dummy=write_output(sampler,args,REPID,BATCH,'w') elif args.sampler == 'stats': dummy=write_output(sampler,args,REPID,BATCH,'a') else: dummy=write_output(sampler,args,REPID,BATCH,'a') sampler=get_sampler_type(args.sampler,args.trait,len(pops),args.optimum) qt.evolve_regions_qtrait_sampler_fitness(rng,pops,sampler,trait, nlist, 0.0, args.mutrate, args.recrate, nregions, sregions, recregions, args.tsample, sigE, optimum=args.optimum, VS=args.VS) if args.sampler == 'freq': #Append this time! REPID=write_output(sampler,args,REPID,BATCH,'w') elif args.sampler == 'stats': REPID=write_output(sampler,args,REPID,BATCH,'a') else: REPID=write_output(sampler,args,REPID,BATCH,'a')
import datetime import time # In[26]: ##Info dt = datetime.datetime.now() print("This example was processed using ", fp.pkg_version(), "on", dt.month, "/", dt.day, "/", dt.year) print("The dependency versions are", fp.pkg_dependencies()) # In[27]: #set up our sim rng = fp.GSLrng(101) nregions = [fp.Region(0, 1, 1), fp.Region(2, 3, 1)] sregions = [fp.ExpS(1, 2, 1, -0.1), fp.ExpS(1, 2, 0.1, 0.001)] rregions = [fp.Region(0, 3, 1)] popsizes = np.array([1000] * 10000, dtype=np.uint32) # In[28]: #Run the sim pops = fp.evolve_regions(rng, 4, 1000, popsizes[0:], 0.001, 0.0001, 0.001, nregions, sregions, rregions) # In[29]: #Take samples from the simulation samples = [fp.get_samples(rng, i, 20) for i in pops]
def run_batch(argtuple): args, repid, batch = argtuple print("seed for batch = ", args.seed) nstub = "neutral.mu" + str(args.mu) + ".opt" + str(args.opt) sstub = "selected.mu" + str(args.mu) + ".opt" + str(args.opt) rnge = fp.GSLrng(args.seed) NANC = 7310 locus_boundaries = [(float(i + i * 11), float(i + i * 11 + 11)) for i in range(args.nloci)] nregions = [ fp.Region(j[0], j[1], args.theta / (4. * float(NANC)), coupled=True) for i, j in zip(range(args.nloci), locus_boundaries) ] recregions = [ fp.Region(j[0], j[1], args.rho / (4. * float(NANC)), coupled=True) for i, j in zip(range(args.nloci), locus_boundaries) ] sregions = [ fp.GaussianS(j[0] + 5., j[0] + 6., args.mu, args.sigmu, coupled=False) for i, j in zip(range(args.nloci), locus_boundaries) ] f = qtm.MlocusAdditiveTrait() nlist = np.array(get_nlist1(), dtype=np.uint32) pops = fp.MlocusPopVec(args.ncores, nlist[0], args.nloci) sampler = fp.NothingSampler(len(pops)) d = datetime.datetime.now() print("starting batch, ", batch, "at ", d.now()) qtm.evolve_qtraits_mloc_regions_sample_fitness(rnge, pops, sampler, f, nlist[0:], nregions, sregions, recregions, [0.5] * (args.nloci - 1), 0, 0, 0.) d = datetime.datetime.now() print(d.now()) nlist = np.array(get_nlist2(), dtype=np.uint32) qtm.evolve_qtraits_mloc_regions_sample_fitness(rnge, pops, sampler, f, nlist[0:], nregions, sregions, recregions, [0.5] * (args.nloci - 1), 0, args.opt) d = datetime.datetime.now() print(d.now()) if args.statfile is not None: sched = lsp.scheduler_init(args.TBB) for pi in pops: #Apply the sampler 1 population at a time. #This saves a fair bit of RAM. neutralFile = nstub + '.rep' + str(repid) + '.gz' selectedFile = sstub + '.rep' + str(repid) + '.gz' BIGsampler = fp.PopSampler(1, 6000, rnge, False, neutralFile, selectedFile, recordSamples=True, boundaries=locus_boundaries) fp.apply_sampler_single(pi, BIGsampler) if args.statfile is not None: for di in BIGsampler: process_samples((di, args.statfile, locus_boundaries, repid)) repid += 1 pops.clear() pops = None