def get_mean_r2(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): """Returns the mean r2 value for each subpopulation, in list of length n_subpops""" # make pairwise migration matrix M = get_migration_matrix(m, n_subpops) # initialise population n_alleles = 2 pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, alleleNames=[str(i) for i in range(n_alleles)], infoFields='migrate_to') sim.initGenotype(pop, freq=[initial_frequencies, 1 - initial_frequencies]) #sim.initGenotype(pop, freq = [1/n_alleles for i in range(n_alleles)]) sim.initSex(pop) print(M) # run burn in generations pop.evolve(initOps=[], preOps=sim.Migrator(M, mode=sim.BY_PROBABILITY), matingScheme=sim.RandomMating(), gen=gens) # take sample from each subpopulation sample_pop = drawRandomSample(pop, sizes=[S] + [0] * (n_subpops - 1)) #sim.dump(sample_pop) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) #print(sample_pop.dvars(0).alleleFreq) # calculate r2 values sim.stat(sample_pop, LD=list(itertools.combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) #print(sample_pop.dvars(0).R2) r2s = [] for sp in [0]: #range(n_subpops*0): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = 0 count = 0 for pairs in itertools.combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def _create_island(self, pop_sizes, mig, nloci): init_ops = [] init_ops.append(sp.InitSex()) pop = sp.Population(pop_sizes, ploidy=2, loci=[1] * nloci, chromTypes=[sp.AUTOSOME] * nloci, infoFields=list(self._info_fields)) post_ops = [sp.Migrator( demography.migrIslandRates(mig, len(pop_sizes)))] pre_ops = [] self._info_fields.add('migrate_to') return pop, init_ops, pre_ops, post_ops
def _create_stepping_stone(self, pop_sizes, mig, nloci): if len(pop_sizes) == 1: flat_pop_sizes = pop_sizes[0] post_ops = [sp.Migrator( demography.migrSteppingStoneRates(mig, len(flat_pop_sizes)))] else: flat_pop_sizes = [] for line in pop_sizes: flat_pop_sizes.extend(line) post_ops = [sp.Migrator( demography.migr2DSteppingStoneRates(mig, len(pop_sizes), len(pop_sizes[0])))] init_ops = [] init_ops.append(sp.InitSex()) pop = sp.Population(flat_pop_sizes, ploidy=2, loci=[1] * nloci, chromTypes=[sp.AUTOSOME] * nloci, infoFields=list(self._info_fields)) pre_ops = [] self._info_fields.add('migrate_to') return pop, init_ops, pre_ops, post_ops
def get_mean_r2(Ne, S, n_loci, gens, repeats, n_subpops, initial_frequencies, m): M = get_migration_matrix(m, n_subpops) pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') sim.initGenotype(pop, freq=initial_frequencies) pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=initial_frequencies)], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(pop, sizes=[S] * n_subpops) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD=list(combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) r2s = [] for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = count = 0 for pairs in combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def get_FCs(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): ''''Runs simulations for allelic fluctuations model with n subpopulations, and returns a list of FC values (one for each subpopulation)''' # population to evolve ((from infinite gamete pool)) popNe = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') # initial sample population (from infinite gamete pool) popS = sim.Population(size=[S] * n_subpops, ploidy=2, loci=[1] * n_loci) sim.initGenotype(popNe, freq=initial_frequencies) sim.initGenotype(popS, freq=initial_frequencies) # get initial sample allele frequencies sim.stat(popS, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) M = get_migration_matrix(m, n_subpops) popNe.evolve(initOps=[sim.InitSex()], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(popNe, sizes=[S] * n_subpops) sim.stat(sample_pop, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) all_FCs = [] for sp in range(n_subpops): initial_allele_frequencies = popS.dvars(sp).alleleFreq final_allele_frequencies = sample_pop.dvars(sp).alleleFreq sp_count = 0 sp_FC = 0 for locus in range(n_loci): init_pair = repair(initial_allele_frequencies[locus]) end_pair = repair(final_allele_frequencies[locus]) if init_pair[0]**2 + init_pair[1]**2 != 1: sp_FC += fc_variant([init_pair[0], init_pair[1]], [end_pair[0], end_pair[1]]) sp_count += 1 all_FCs.append(sp_FC / sp_count) return all_FCs
... ResizeEvent(at=500, sizes=1000), ... ] ... ) pop = sim.Population(size=model.init_size, loci=[10]*10, ... infoFields='migrate_to') pop.evolve( initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1]) ], preOps=[ sim.Migrator(rate=[ [0,0.000005,0.000005], [0.000005,0,0.000005], [0.000005,0.000005,0], ],mode=sim.BY_PROPORTION, end=499), sim.Migrator(rate=[ [0,0.005,0.005], [0.005,0,0.005], [0.005,0.005,0], ],mode=sim.BY_PROPORTION, begin=500) ], matingScheme=sim.RandomMating(subPopSize=model,ops=sim.Recombinator(rates=0.01)), postOps=[ sim.PyOperator(func=calcFst, step=100) ], gen = 1000 )
pop.resize([130, 338], propagate=True) # set sex for idx in range(68, 130): pop.individual(idx, 0).setSex(sim.MALE) # add 110-37=73 male for idx in range(192, 192 + 73): pop.individual(idx, 1).setSex(sim.MALE) for idx in range(192 + 73, 338): pop.individual(idx, 1).setSex(sim.FEMALE) migr = sim.Migrator( rate=[ [0, 0.1], [0, 0.001], [0.05, 0], [0.001, 0], ], mode=sim.BY_PROBABILITY, subPops=[(0, 'Male'), (0, 'Female'), (1, 'Male'), (1, 'Female')], toSubPops=[0, 1], ) # let us test the migration pop.addInfoFields('migrate_to') def demoModel(gen, pop): if gen < 50: sz = 130, 550 elif gen < 120: sz = 80, 550
node_ids = {x:j for x, j in zip(haploid_labels, init_ts.samples())} rc = RecombCollector(ts=init_ts, node_ids=node_ids, locus_position=locus_position) migr_rates = migrRates(args.migr, m=args.gridwidth, n=args.gridheight, barrier=False) barrier_rates = migrRates(args.migr, m=args.gridwidth, n=args.gridheight, barrier=True) pop.evolve( initOps=[ sim.InitSex(), ]+init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), sim.Migrator( rate=migr_rates, mode=sim.BY_PROBABILITY, begin=0, end=args.pre_generations), sim.Migrator( rate=barrier_rates, mode=sim.BY_PROBABILITY, begin=1 + args.pre_generations, end=args.split_generations + args.pre_generations), sim.Migrator( rate=migr_rates, mode=sim.BY_PROBABILITY, begin=1 + args.split_generations + args.pre_generations, end=args.split_generations + args.pre_generations + args.post_generations), sim.SNPMutator(u=args.sel_mut_rate, v=args.sel_mut_rate), sim.PyMlSelector(fitness_fun, output=">>"+selloci_file),
def simuGWAS(pop, mutaRate=1.8e-8, recIntensity=1e-8, migrRate=0.0001, expandGen=500, expandSize=[10000], DPL=[], curFreq=[], fitness=[1, 1, 1], scale=1, logger=None): # handling scaling... mutaRate *= scale recIntensity *= scale migrRate *= scale expandGen = int(expandGen / scale) fitness = [1 + (x - 1) * scale for x in fitness] pop.dvars().scale = scale # Demographic function demoFunc = linearExpansion(pop.subPopSizes(), expandSize, expandGen) # define a trajectory function trajFunc = None introOps = [] if len(DPL) > 0: stat(pop, alleleFreq=DPL, vars='alleleFreq_sp') currentFreq = [] for sp in range(pop.numSubPop()): for loc in pop.lociByNames(DPL): currentFreq.append(pop.dvars(sp).alleleFreq[loc][1]) # if there is no existing mutants at DPL if sum(currentFreq) == 0.: endFreq = [(x - min(0.01, x / 5.), x + min(0.01, x / 5., (1 - x) / 5.)) for x in curFreq] traj = simulateForwardTrajectory(N=demoFunc, beginGen=0, endGen=expandGen, beginFreq=currentFreq, endFreq=endFreq, nLoci=len(DPL), fitness=fitness, maxAttempts=1000, logger=logger) introOps = [] else: traj = simulateBackwardTrajectory(N=demoFunc, endGen=expandGen, endFreq=curFreq, nLoci=len(DPL), fitness=fitness, minMutAge=1, maxMutAge=expandGen, logger=logger) introOps = traj.mutators(loci=DPL) if traj is None: raise SystemError( 'Failed to generated trajectory after 1000 attempts.') trajFunc = traj.func() if pop.numSubPop() > 1: pop.addInfoFields('migrate_to') pop.dvars().scale = scale pop.evolve( initOps=sim.InitSex(), preOps=[ sim.SNPMutator(u=mutaRate, v=mutaRate), sim.IfElse( pop.numSubPop() > 1, sim.Migrator( rate=migrSteppingStoneRates(migrRate, pop.numSubPop()))), ] + introOps, matingScheme=sim.ControlledRandomMating( loci=DPL, alleles=[1] * len(DPL), freqFunc=trajFunc, ops=sim.Recombinator(intensity=recIntensity), subPopSize=demoFunc), postOps=[ sim.Stat(popSize=True, structure=range(pop.totNumLoci())), sim.PyEval( r'"After %3d generations, size=%s\n" % ((gen + 1 )* scale, subPopSize)' ), sim.IfElse(pop.numSubPop() > 1, sim.PyEval(r"'F_st = %.3f\n' % F_st", step=10), step=10), ], gen=expandGen) return pop
def update_accumulator(pop, param): accumulator, var = param if var.endswith('_sp'): for sp in range(pop.numSubPop()): pop.vars()[accumulator][sp].append( deepcopy(pop.vars(sp)[var[:-3]])) else: pop.vars()[accumulator].append(deepcopy(pop.vars()[var])) return True init_ops['accumulators'] = sp.PyOperator(init_accumulators, param=['fst']) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(freq=[0.5, 0.5]) for i, mig in enumerate(migs): post_ops['mig-%d' % i] = sp.Migrator(demography.migrIslandRates(mig, num_pops), reps=[i]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) post_ops['fst_accumulation'] = sp.PyOperator(update_accumulator, param=('fst', 'F_st')) mating_scheme = sp.RandomMating() sim = sp.Simulator(pops, rep=len(migs)) sim.evolve(initOps=list(init_ops.values()), preOps=list(pre_ops.values()), postOps=list(post_ops.values()), matingScheme=mating_scheme, gen=num_gens) import seaborn as sns sns.set_style('white') import matplotlib.pyplot as plt
import simuPOP as sim from simuPOP.utils import migrIslandRates p = [0.2, 0.3, 0.5] pop = sim.Population(size=[10000] * 3, loci=1, infoFields='migrate_to') simu = sim.Simulator(pop, rep=2) simu.evolve( initOps=[sim.InitSex()] + [sim.InitGenotype(prop=[p[i], 1 - p[i]], subPops=i) for i in range(3)], preOps=sim.Migrator(rate=migrIslandRates(0.01, 3), reps=0), matingScheme=sim.RandomMating(), postOps=[ sim.Stat(alleleFreq=0, structure=0, vars='alleleFreq_sp', step=50), sim.PyEval( "'Fst=%.3f (%s)\t' % (F_st, ', '.join(['%.2f' % " "subPop[x]['alleleFreq'][0][0] for x in range(3)]))", step=50), sim.PyOutput('\n', reps=-1, step=50), ], gen=201)
subPopNames=str( list(networkmodel.get_subpopulation_names())), infoFields='migrate_to', ploidy=1, loci=100) ### now set up the activities init_ops['acumulators'] = sp.PyOperator( init_acumulators, param=['fst', 'alleleFreq', 'haploFreq']) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(loci=0, freq=distribution) post_ops['Innovate'] = sp.KAlleleMutator(k=MAXALLELES, rates=innovation_rate, loci=sp.ALL_AVAIL) #post_ops['mig'] = sp.Migrator(demography.migrIslandRates(migration_rate, num_pops)) #, reps=[i]) post_ops['mig'] = sp.Migrator(rate=networkmodel.get_migration_matrix()) #for i, mig in enumerate(migs): # post_ops['mig-%d' % i] = sp.Migrator(demography.migrIslandRates(mig, num_pops), reps=[i]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) #post_ops['haploFreq']=sp.stat(pops, haploFreq=[0], vars=['haploFreq', 'haploNum']) #post_ops['alleleFreq']=sp.stat(pops, alleleFreq=sp.ALL_AVAIL) post_ops['Stat-richness'] = sp.Stat( alleleFreq=[0], haploFreq=[0], vars=['alleleFreq', 'haploFreq', 'alleleNum', 'genoNum']) post_ops['fst_acumulation'] = sp.PyOperator(update_acumulator, param=['fst', 'F_st']) post_ops['richness_acumulation'] = sp.PyOperator( update_richness_acumulator, param=('alleleFreq', 'Freq of Alleles'))
return 1. - 2.*s pop = sim.Population( size=[popsize]*npops, loci=[nloci], lociPos=locus_position, infoFields=['ind_id','fitness','migrate_to']) pop.evolve( initOps=[ sim.InitSex(), sim.IdTagger(), ]+init_geno, preOps=[ sim.Migrator( rate=migr2DSteppingStoneRates( migr, m=width, n=width, diagonal=False, circular=False), mode=sim.BY_PROBABILITY), sim.AcgtMutator(rate=[mut_rate], model='JC69'), sim.PyMlSelector(GammaDistributedFitness(alpha, beta), output='>>'+selloci_file), ], matingScheme=sim.RandomMating( ops=[ sim.IdTagger(), sim.Recombinator(intensity=recomb_rate, output=outfile, infoFields="ind_id"), ] ), postOps=[ sim.Stat(numOfSegSites=sim.ALL_AVAIL, step=50), sim.PyEval(r"'Gen: %2d #seg sites: %d\n' % (gen, numOfSegSites)", step=50)
# description of this example. # import simuPOP as sim # create a population without any genotype from simuPOP.utils import migrSteppingStoneRates ped = sim.Population( size=[1000] * 5, ancGen=-1, infoFields=['ind_id', 'father_id', 'mother_id', 'migrate_to']) ped.evolve( initOps=[ sim.InitSex(), sim.IdTagger(), ], preOps=sim.Migrator(rate=migrSteppingStoneRates(0.1, 5)), matingScheme=sim.RandomMating( numOffspring=(sim.UNIFORM_DISTRIBUTION, 2, 4), ops=[ # we do not even need a genotype transmitter... sim.IdTagger(), sim.PedigreeTagger(), ]), gen=100) # convert itself to a pedigree object ped.asPedigree() # we should have 100 ancestral generations N = ped.ancestralGens() # We should have 101 * 1000 * 5 individuals, but how many actually # contribute genotype to the last generation? anc = ped.identifyAncestors()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--experiment", help="provide name for experiment", required=True, type=str, default="test") parser.add_argument("--debug", help="turn on debugging output") parser.add_argument("--reps", help="Replicated populations per parameter set", type=int, default=1) parser.add_argument( "--networkfile", help= "Name of GML file representing the network model for this simulation", required=True, type=str) parser.add_argument("--numloci", help="Number of loci per individual", type=int, required=True) parser.add_argument( "--maxinittraits", help="Max initial number of traits per locus for initialization", type=int, required=True) parser.add_argument( "--innovrate", help= "Rate at which innovations occur in population as a per-locus rate", type=float, default=0.001) parser.add_argument( "--simlength", help= "Time at which simulation and sampling end, defaults to 3000 generations", type=int, default="20") parser.add_argument( "--popsize", help="Initial size of population for each community in the model", type=int, required=True) parser.add_argument( "--migrationfraction", nargs='+', help="Fraction of population that migrates each time step", type=float, required=True, default=[]) parser.add_argument( "--seed", type=int, help="Seed for random generators to ensure replicability") parser.add_argument("--k_values", nargs='+', type=int, help="list of k-values to explore [e.g., 2 4 20 24", default=[]) parser.add_argument("--sub_pops", nargs='+', help="Number of sub populations", required=True, default=[10]) parser.add_argument("--maxalleles", type=int, help="Maximum number of alleles", default=50) parser.add_argument("--save_figs", type=bool, help="Save figures or not?", default=True) parser.add_argument("--burnintime", type=int, help="How long to wait before making measurements? ", default=2000) parser.add_argument("--rewiringprob", type=float, help="Probability of random rewiring", default=0) config = parser.parse_args() # check the k and migration rate combinations for kvalue in config.k_values: if float(kvalue) * float(config.migrationfraction) >= 1.0: print("k=%s * mig=%4f is greater than 1.0\n" % (kvalue, config.migrationfraction)) print( "Please adjust input values for k and/or migration rate and restart.\n " ) sys.exit() # setup output directories for writing output_path = utils.setup_output(config.experiment) # save parameters utils.save_parameters(str(sys.argv), config, output_path) k_run_values = config.k_values subpop_run_values = config.sub_pops ## make sure the k values are less than # of subpops and > 1 for k in k_run_values: for subnum in subpop_run_values: if int(k) > int(subnum) or int(k) < 2: print( "k values can not be greater than the number of sub populations. k = %s subpops = %s \n" % (k, subnum)) sys.exit() ## initialize the output dictionary for k in k_run_values: for sb in subpop_run_values: output[k][sb] = {} # set up the frequencies for the alleles in each loci. Here assuming a uniform distribution as a starting point distribution = utils.constructUniformAllelicDistribution( config.maxinittraits) iteration_number = -1 for k in k_run_values: for subnum in subpop_run_values: iteration_number += 1 ## these are lists of things that simuPop will do at different stages init_ops = OrderedDict() pre_ops = OrderedDict() post_ops = OrderedDict() # Construct a demographic model from a collection of network slices which represent a temporal network # of changing subpopulations and interaction strengths. This object is Callable, and simply is handed # to the mating function which applies it during the copying process #networkmodel = NetworkModel( networkmodel="/Users/clipo/Documents/PycharmProjects/RapaNuiSim/notebooks/test_graph.gml", networkmodel = network.NetworkModel( networkmodel="smallworld", simulation_id=config.experiment, sim_length=config.simlength, burn_in_time=config.burnintime, initial_subpop_size=config.popsize, migrationfraction=config.migrationfraction, sub_pops=subnum, connectedness=k, # if 0, then distance decay save_figs=config.save_figs, network_iteration=iteration_number) num_pops = networkmodel.get_subpopulation_number() sub_pop_size = int(config.popsize / num_pops) # The regional network model defines both of these, in order to configure an initial population for evolution # Construct the initial population pops = sp.Population( size=[sub_pop_size] * num_pops, subPopNames=str(list(networkmodel.get_subpopulation_names())), infoFields='migrate_to', ploidy=1, loci=config.numloci) ### now set up the activities init_ops['acumulators'] = sp.PyOperator( utils.init_acumulators, param=['fst', 'alleleFreq', 'haploFreq']) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(loci=list(range( config.numloci)), freq=distribution) post_ops['Innovate'] = sp.KAlleleMutator(k=config.maxalleles, rates=config.innovrate, loci=sp.ALL_AVAIL) post_ops['mig'] = sp.Migrator( rate=networkmodel.get_migration_matrix()) #, reps=[3]) #for i, mig in enumerate(migs): # post_ops['mig-%d' % i] = sp.Migrator(demography.migrIslandRates(mig, num_pops), reps=[i]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) post_ops['Stat-richness'] = sp.Stat( alleleFreq=[0], haploFreq=[0], vars=['alleleFreq', 'haploFreq', 'alleleNum', 'genoNum']) post_ops['fst_acumulation'] = sp.PyOperator( utils.update_acumulator, param=['fst', 'F_st']) post_ops['richness_acumulation'] = sp.PyOperator( utils.update_richness_acumulator, param=('alleleFreq', 'Freq of Alleles')) post_ops['class_richness'] = sp.PyOperator( utils.calculateAlleleAndGenotypeFrequencies, param=(config.popsize, config.numloci)) mating_scheme = sp.RandomSelection() #mating_scheme=sp.RandomSelection(subPopSize=sub_pop_size) ## go simuPop go! evolve your way to the future! sim = sp.Simulator(pops, rep=config.reps) print("now evolving... k = %s with sub_pops = %s" % (k, subnum)) sim.evolve(initOps=list(init_ops.values()), preOps=list(pre_ops.values()), postOps=list(post_ops.values()), matingScheme=mating_scheme, gen=config.simlength) # now make a figure of the Fst results fig = plt.figure(figsize=(16, 9)) ax = fig.add_subplot(111) count = 0 for pop in sim.populations(): ax.plot(pop.dvars().fst, label='Replicate: %s' % count) output[k][subnum][count] = deepcopy(pop.dvars()) count += 1 ax.legend(loc=2) ax.set_ylabel('FST') ax.set_xlabel('Generation') plt.show() sum_fig = plt.figure(figsize=(16, 9)) ax = sum_fig.add_subplot(111) iteration = -1 for k in k_run_values: for subnum in subpop_run_values: iteration += 1 # only label the first one for n in range(config.reps): if n == 0: ax.plot(output[k][subnum][n].fst, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) else: ax.plot(output[k][subnum][n].fst, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration]) ax.legend(loc=2) ax.set_ylabel('Fst') ax.set_xlabel('Generations') plt.show() savefilename = output_path + "/sum_fig.png" sum_fig.savefig(savefilename, bbox_inches='tight') rich_fig = plt.figure(figsize=(16, 9)) ax = rich_fig.add_subplot(111) iteration = -1 for k in k_run_values: for sb in subpop_run_values: iteration += 1 # only add a label for the first one (not all the replicates) for n in range(config.reps): if n == 0: ax.plot(output[k][subnum][n].richness, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) else: ax.plot(output[k][subnum][n].richness, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration]) ax.legend(loc=2) ax.set_ylabel('Richness') ax.set_xlabel('Generations') plt.show() savefilename = output_path + "/richness.png" rich_fig.savefig(savefilename, bbox_inches='tight') ## output CI for the parameters summary_fig = plt.figure(figsize=(16, 9)) ax = summary_fig.add_subplot(111) iteration = -1 for k in k_run_values: for subnum in subpop_run_values: iteration += 1 CI_average = [] CI_min = [] CI_max = [] for t in range(len(output[k][subnum][0].fst)): point_in_time = [] for n in range(config.reps): list_of_points = list(output[k][subnum][n].fst) point_in_time.append(list_of_points[t]) (ave, min, max) = utils.mean_confidence_interval(point_in_time, confidence=0.95) CI_average.append(ave) CI_min.append(min) CI_max.append(max) ax.plot(list(CI_average), color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) ax.plot(list(CI_min), "--", color="0.5") ax.plot(list(CI_max), "--", color="0.5") ax.fill_between(list(CI_average), list(CI_max), list(CI_min), color="None", linestyle="--") ax.legend(loc=2) ax.set_ylabel('Fst') ax.set_xlabel('Generation') plt.show() savefilename = output_path + "/summary-ci.png" summary_fig.savefig(savefilename, bbox_inches='tight') ## now the richness graph richness_sum_fig = plt.figure(figsize=(16, 9)) ax = richness_sum_fig.add_subplot(111) iteration = -1 for k in k_run_values: for subnum in subpop_run_values: iteration += 1 CI_average = [] CI_min = [] CI_max = [] for t in range(len(output[k][subnum][0].richness)): point_in_time = [] for n in range(config.reps): list_of_points = list(output[k][subnum][n].richness) point_in_time.append(list_of_points[t]) (ave, min, max) = utils.mean_confidence_interval(point_in_time, confidence=0.95) CI_average.append(ave) CI_min.append(min) CI_max.append(max) ax.plot(list(CI_average), color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) ax.plot(list(CI_min), "--", color="0.5") ax.plot(list(CI_max), "--", color="0.5") ax.fill_between(list(CI_average), list(CI_max), list(CI_min), color="None", linestyle="--") ax.legend(loc=2) ax.set_ylabel('Richness') ax.set_xlabel('Generation') plt.show() savefilename = output_path + "/richness-ci.png" richness_sum_fig.savefig(savefilename, bbox_inches='tight')
pop = sim.Population(size=[popsize], loci=vecloc, infoFields=['migrate_to', 'fitness', 'env'], lociNames=allele_naming(numchrom, numloc)) #--------------------------Main evolving process------------------------ pop.evolve( #Initializing sex and genotype initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5]), ], preOps=[ #Fisson of population into 16 at 'atgen' generation sim.SplitSubPops(proportions=[0.0625] * 16, at=atgen), #Migration using a simple stepping stone model sim.Migrator(rate=migrSteppingStoneRates(m, 16, circular=False), begin=atgen), #Selection process #Set environmental value (env infoField) for each individual in the population #Takes place at each generation after the first fission sim.PyOperator(env_set, begin=atgen), #Selection occures at selected loci according to env information field sim.PySelector(fit_env, loci=locisel, begin=atgen), ], #Mating at random (pangamy) matingScheme=sim.RandomMating( #Fixed population size (fixed at 'popsize') subPopSize=demo, #Recombination ops=[sim.Recombinator(rates=0.002)]), postOps=[ #Mutation rate 10e-6
m=args.gridwidth, n=args.gridheight, barrier=False) barrier_rates = migrRates(args.migr, m=args.gridwidth, n=args.gridheight, barrier=True) pop.evolve( initOps=[ sim.InitSex(), ] + init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), sim.Migrator(rate=migr_init, mode=sim.BY_PROBABILITY, begin=0, end=args.switch_time), sim.Migrator(rate=migr_change, mode=sim.BY_PROBABILITY, begin=args.switch_time), sim.AcgtMutator(rate=[args.sel_mut_rate], model='JC69'), sim.PyMlSelector(GammaDistributedFitness(args.gamma_alpha, args.gamma_beta), output=">>" + selloci_file), ], matingScheme=sim.RandomMating(ops=[ id_tagger, sim.Recombinator(intensity=args.recomb_rate, output=rc.collect_recombs, infoFields="ind_id"), ]),
# This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim pop = sim.Population(size=[1000] * 2, infoFields='migrate_to') pop.setVirtualSplitter(sim.SexSplitter()) pop.evolve( # 500 males and 500 females initOps=sim.InitSex(sex=[sim.MALE, sim.FEMALE]), preOps=[ sim.Migrator(rate=[ [0, 0.10], [0, 0.05], ], mode=sim.BY_PROPORTION, subPops=[(0, 0), (0, 1)]), sim.Stat(popSize=True, numOfMales=True, vars='numOfMales_sp'), sim.PyEval( r"'%d/%d\t%d/%d\n' % (subPop[0]['numOfMales'], subPopSize[0], " "subPop[1]['numOfMales'], subPopSize[1])"), ], matingScheme=sim.RandomMating(), postOps=[ sim.Stat(popSize=True, numOfMales=True, vars='numOfMales_sp'), sim.PyEval( r"'%d/%d\t%d/%d\n' % (subPop[0]['numOfMales'], subPopSize[0], " "subPop[1]['numOfMales'], subPopSize[1])"), ], gen=2)
if a[0] > crit and a[1] > crit: return True return False simu = False if simu: m = .5 S = 200 nLoci = 100 pop = sim.Population(loci=[1] * nLoci, size=[500] * 2, infoFields='migrate_to') pop.evolve(initOps=[sim.InitSex(), sim.InitGenotype(freq=[.5, .5])], preOps=sim.Migrator(rate=[[0, m], [m, .0]]), matingScheme=sim.RandomMating(), postOps=[ sim.Stat(popSize=True), sim.PyEval('subPopSize'), sim.PyOutput('\n') ], gen=100) sim.Migrator(rate=[[0, m], [m, .0]]) sample_pop = drawRandomSample(pop, sizes=[S] * 2) sim.stat(sample_pop, LD=list(itertools.combinations(list(range(nLoci)), r=2)), vars=['R2_sp']) sim.stat(sample_pop, alleleFreq=range(0, nLoci), vars=['alleleFreq_sp']) r2s = sample_pop.dvars().__dict__['subPop'][0]['R2']
def simuRareVariants(regions, N, G, mu, selDist, selCoef, selModel='exponential', recRate=0, splitTo=[1], splitAt=0, migrRate=0, steps=[100], mutationModel='finite_sites', initPop='', extMutantFile='', addMutantsAt=0, postHook=None, statFile='', popFile='', markerFile='', mutantFile='', genotypeFile='', verbose=1, logger=None): ''' Please refer to simuRareVariants.py -h for a detailed description of all parameters. Note that a user-defined function can be passed to parameter selDist to specify arbitrary distribution of fitness. A script-only feature is that a Python function can be provided through parameter postHook to process the population at each generation. ''' # # convert regions to start/end positions ranges = [] chromTypes = [] for region in regions: start, end = [int(x) for x in region.split(':')[1].split('..')] ranges.append((start, end + 1)) if region.split(':')[0] == 'chrX': chromTypes.append(sim.CHROMOSOME_X) if len(regions) > 1: raise ValueError( 'The current implementation only allows one region if it is on chromosome X' ) logger.info('Chromosome {} is on chromosome X'.format(region)) elif region.split(':')[0] == 'chrY': raise ValueError( 'The current implementation does not support chromosome Y') chromTypes.append(sim.CHROMOSOME_Y) logger.info('Chromosome {} is on chromosome Y'.format(region)) else: chromTypes.append(sim.AUTOSOME) if logger: logger.info('%s regions with a total length of %d basepair.' % (len(ranges), sum([x[1] - x[0] for x in ranges]))) # # set default parameter if selCoef is None: # set default parameters if selDist == 'mixed_gamma': selCoef = [0.0186, 0.0001, 0.184, 0.160 * 2, 0.5, 0.0001, 0.1] elif selDist == 'mixed_gamma1': selCoef = [0, -1, 0.562341, 0.01, 0.5, 0.00001, 0.1] elif selDist == 'gamma1': selCoef = [0.23, 0.185 * 2, 0.5] elif selDist == 'gamma2': selCoef = [0.184, 0.160 * 2, 0.5] elif selDist == 'gamma3': selCoef = [0.206, 0.146 * 2, 0.5] elif selDist == 'constant': selCoef = [0.01, 0.5] elif not isinstance(selDist, collections.Callable): raise ValueError("Unsupported random distribution") else: # force to list type selCoef = list(selCoef) if len(steps) == 0: # at the end of each stage steps = G elif len(steps) == 1: # save step for each stage steps = steps * len(G) # use a right selection operator. collector = fitnessCollector() mode = { 'multiplicative': sim.MULTIPLICATIVE, 'additive': sim.ADDITIVE, 'exponential': sim.EXPONENTIAL }[selModel] # if type(popFile) == str: popFile = [popFile, -1] # if isinstance(selDist, collections.Callable): mySelector = MutSpaceSelector(selDist=selDist, mode=mode, output=collector.getCoef) elif selDist == 'mixed_gamma': mySelector = MutSpaceSelector(selDist=mixedGamma(selCoef), mode=mode, output=collector.getCoef) elif selDist == 'mixed_gamma1': mySelector = MutSpaceSelector(selDist=mixedGamma1(selCoef), mode=mode, output=collector.getCoef) elif selDist.startswith('gamma'): mySelector = MutSpaceSelector(selDist=[sim.GAMMA_DISTRIBUTION] + selCoef, mode=mode, output=collector.getCoef) elif selDist == 'constant': if selCoef == 0: mySelector = sim.NoneOp() else: mySelector = MutSpaceSelector(selDist=[sim.CONSTANT] + selCoef, mode=mode, output=collector.getCoef) # # Evolve if os.path.isfile(initPop): if logger: logger.info('Loading initial population %s...' % initPop) pop = sim.loadPopulation(initPop) if pop.numChrom() != len(regions): raise ValueError( 'Initial population %s does not have specified regions.' % initPop) for ch, reg in enumerate(regions): if pop.chromName(ch) != reg: raise ValueError( 'Initial population %s does not have region %s' % (initPop, reg)) pop.addInfoFields(['fitness', 'migrate_to']) else: pop = sim.Population(size=N[0], loci=[10] * len(regions), chromNames=regions, infoFields=['fitness', 'migrate_to'], chromTypes=chromTypes) if logger: startTime = time.clock() # progGen = [] # 0, G[0], G[0]+G[1], ..., sum(G) Gens = [sum(G[:i]) for i in range(len(G) + 1)] for i in range(len(Gens) - 1): progGen += list(range(Gens[i], Gens[i + 1], steps[i])) pop.evolve( initOps=sim.InitSex(), preOps=[ sim.PyOutput('''Statistics outputted are 1. Generation number, 2. population size (a list), 3. number of segregation sites, 4. average number of segregation sites per individual 5. average allele frequency * 100 6. average fitness value 7. minimal fitness value of the parental population ''', at = 0)] + \ [sim.PyOutput('Starting stage %d\n' % i, at = Gens[i]) for i in range(0, len(Gens))] + \ # add alleles from an existing population [sim.IfElse(extMutantFile != '', ifOps = [ sim.PyOutput('Loading and converting population %s' % extMutantFile), sim.PyOperator(func=addMutantsFrom, param=(extMutantFile, regions, logger)), ], at = addMutantsAt), # revert alleles at fixed loci to wildtype MutSpaceRevertFixedSites(), # mutate in a region at rate mu, if verbose > 2, save mutation events to a file MutSpaceMutator(mu, ranges, {'finite_sites':1, 'infinite_sites':2}[mutationModel], output='' if verbose < 2 else '>>mutations.lst'), # selection on all loci mySelector, # output statistics in verbose mode sim.IfElse(verbose > 0, ifOps=[ sim.Stat(popSize=True, meanOfInfo='fitness', minOfInfo='fitness'), NumSegregationSites(), sim.PyEval(r'"%5d %s %5d %.6f %.6f %.6f %.6f\n" ' '% (gen, subPopSize, numSites, avgSites, avgFreq*100, meanOfInfo["fitness"], minOfInfo["fitness"])', output='>>' + statFile), ], at = progGen ), sim.IfElse(len(splitTo) > 1, sim.Migrator(rate=migrIslandRates(migrRate, len(splitTo)), begin=splitAt + 1) ), ], matingScheme=sim.RandomMating(ops=MutSpaceRecombinator(recRate, ranges), subPopSize=multiStageDemoFunc(N, G, splitTo, splitAt)), postOps = [ sim.NoneOp() if postHook is None else sim.PyOperator(func=postHook), sim.SavePopulation(popFile[0], at=popFile[1]), ], finalOps=[ # revert fixed sites so that the final population does not have fixed sites MutSpaceRevertFixedSites(), sim.IfElse(verbose > 0, ifOps=[ # statistics after evolution sim.Stat(popSize=True), NumSegregationSites(), sim.PyEval(r'"%5d %s %5d %.6f %.6f %.6f %.6f\n" ' '% (gen+1, subPopSize, numSites, avgSites, avgFreq*100, meanOfInfo["fitness"], minOfInfo["fitness"])', output='>>' + statFile), sim.PyEval(r'"Simulated population has %d individuals, %d segregation sites.' r'There are on average %.1f sites per individual. Mean allele frequency is %.4f%%.\n"' r'% (popSize, numSites, avgSites, avgFreq*100)'), ]), ], gen = Gens[-1] ) # record selection coefficients to population if len(collector.selCoef) == 0: # this must be the neutral case where a NonOp has been used. pop.dvars().selCoef = 0 else: pop.dvars().selCoef = collector.selCoef # re-save the file with the added selCoef if popFile[-1] == -1: pop.save(popFile[0]) # if logger: logger.info('Population simulation takes %.2f seconds' % (time.clock() - startTime)) if markerFile or genotypeFile: if logger: logger.info('Saving marker information to file %s' % markerFile) mutants = saveMarkerInfoToFile(pop, markerFile, logger) if genotypeFile: if logger: logger.info('Saving genotype in .ped format to file %s' % genotypeFile) saveGenotypeToFile(pop, genotypeFile, mutants, logger) if mutantFile: if logger: logger.info('Saving mutants to file %s' % mutantFile) saveMutantsToFile(pop, mutantFile, logger=logger) return pop
def get_mean_r2(): ########################### full_estimates = {} for m in ms: m_adj = m / (n_subpops-1) M = np.full( (n_subpops,n_subpops), m_adj ) np.fill_diagonal(M, 0) M = M.tolist() r2s = [] estimates = [] for r in range(repeats): print(r+1) # set up population pop = sim.Population(size=[Ne]*n_subpops, ploidy=2, loci=[1]*n_loci, infoFields = 'migrate_to') # evolve population pop.evolve( initOps = [sim.InitSex(), sim.InitGenotype(freq = [0.5,0.5])], preOps = sim.Migrator(rate=M), matingScheme = sim.RandomMating(), gen = gens ) # take sample of size S sample_pop = drawRandomSample(pop, sizes = [S]*n_subpops) # get allele frequency sim.stat(sample_pop, alleleFreq = range(0,n_loci), vars = ['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD = list(combinations(list(range(n_loci)), r=2)), vars = ['R2_sp']) estimates.append([]) r2s.append([]) for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [] # find which alleles are segregating for k in allele_freqs.keys(): if (allele_freqs[k][0] > 0.04) and (allele_freqs[k][1] > 0.04): seg_alleles.append(k) # only proceed if there are 2 or more segregating alleles (to measure r2) if len(seg_alleles) < 2: continue # calculate mean r2 r2_total = 0 count = 0 for pairs in combinations(seg_alleles, r=2): r2_i = sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] r2_total += r2_i count+=1 mean_r2 = r2_total / count # correct r2 for sample size r2_drift = (mean_r2 - 1/(2*S)) / (1 - 1/(2*S)) #get Ne estimate Ne_est = 1/(3*r2_drift) estimates[-1].append(Ne_est) r2s[-1].append(r2_drift) full_estimates[m] = estimates means = [np.mean(full_estimates[m]) for m in ms] plt.scatter(ms, means, edgecolors='black', color = 'white') plt.plot([min(ms),max(ms)], [100,100], 'k--') plt.xscale('log') plt.xticks(ticks = ms, labels = ms) plt.ylim(50,150) plt.xlim(min(ms)*0.95,max(ms)*1.05) plt.show()
import simuOpt simuOpt.setOptions(gui=False, alleleType='binary') import simuPOP as sim pop.addInfoFields(['ancestry', 'migrate_to']) # initialize ancestry sim.initInfo(pop, [0] * pop.subPopSize(0) + [1] * pop.subPopSize(1), infoFields='ancestry') # define two virtual subpopulations by ancestry value pop.setVirtualSplitter(sim.InfoSplitter(field='ancestry', cutoff=[0.5])) transmitters = [ sim.MendelianGenoTransmitter(), sim.InheritTagger(mode=sim.MEAN, infoFields='ancestry') ] pop.evolve( initOps=sim.InitSex(), preOps=sim.Migrator(rate=[[0., 0], [0.05, 0]]), matingScheme=sim.HeteroMating(matingSchemes=[ sim.RandomMating(ops=transmitters), sim.RandomMating(subPops=[(0, 0)], weight=-0.80, ops=transmitters), sim.RandomMating(subPops=[(0, 1)], weight=-0.80, ops=transmitters) ], ), gen=10, ) # remove the second subpop pop.removeSubPops(1)
for p in range(3): for l in [dmi1, dmi2, dmi3, dmi4, ad1, ad2, ad3]: if l == ad3: print '%.2f\n' % pop.dvars(p).alleleFreq[l][1], else: print '%.2f' % pop.dvars(p).alleleFreq[l][1], return True pop.evolve( initOps=[ sim.InitSex(), #sim.Stat(popSize=True), #sim.PyEval(r'"%d %s " % (gen, subPopSize)'), ], preOps=sim.PySelector( loci=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ], func=sel_1), # !! change for different types of selection !! # matingScheme=sim.RandomMating(ops=sim.Recombinator(intensity=r), subPopSize=[popsize, popsize, popsize]), postOps=[ sim.Migrator(rate=[[0.0, m, 0.0], [0.0, 0.0, 0.0], [0.0, m, 0.0]]), #sim.Stat(popSize=True), #sim.PyEval(r'"%d %s " % (gen, subPopSize)'), sim.PyOperator(printAlleleFreq, step=10), ], gen=1001)
Fstsample = sample.dvars().F_st sample.addInfoFields('order') order = list(range(100)) fstsim = '' for rep in range(1000): merged = sample merged.mergeSubPops() np.random.shuffle(order) merged.setIndInfo(order, field='order') merged.sortIndividuals('order') merged.splitSubPop(0, [50] * 2) sim.stat(merged, structure=range(10), vars=['F_st']) fstsim += '%s\t' % merged.dvars().F_st sortie += '%3d\t%.6f\t%3d\t%.6f\t%s\n' % (pop.dvars().gen, Fstpop, a, Fstsample, fstsim) reccord(sortie, "dataout") return True pop = sim.Population([100000] * 2, loci=[10] * 10, infoFields='migrate_to') pop.evolve(initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) ], preOps=sim.Migrator(rate=[[0, 0.0001], [0.0001, 0]], mode=sim.BY_PROPORTION), matingScheme=sim.RandomMating(ops=sim.Recombinator(rates=0.01)), postOps=[sim.PyOperator(func=calcFst, step=50)], gen=5000)
def runSimulation(scenario_id, sub_population_size, minMatingAge, maxMatingAge, gen): ''' sub_population_size A vector giving the population sizes for each sub-population. The subpopulations determine which breeding ground an individual belongs to minMatingAge minimal mating age. maxMatingAge maximal mating age. Individuals older than this are effectively dead years number of years to simulate ''' # scenario_id describes the batch of files to load # The mitochondrial DNA will be in mtdna_<scenario_id> # The SNP DNA will be in snp_<scenario_id> # Read the mitochondrial haplotype frequencies. There's a bit to unpack here # We read the lines into an array, and for each one, call split() on it to get one element per column. # However, we do not want this - we want the transpose, where haplotype_frequencies[0] is a vector of # all the frequencies for population 0, and haplotype_frequencies[1] is the corresponding vector for # population 2. list(map(list, zip(*t))) will achieve this transformation for us. # While we are at it, we also convert the strings into floats. mitochondrial_file = "mtdna_" + scenario_id + ".txt" with open(mitochondrial_file, "r") as fd: haplotype_frequencies = list( map(list, zip(*[list(map(float, line[0:-1].split())) for line in fd]))) if len(haplotype_frequencies) != len(sub_population_size): raise ValueError( 'The number of populations in the population size vector and the number of populations deduced from the haplotype file are different' ) # Now read the SNP data. This builds a 2D array indexed as snp[locus][population] snp_file = "snp_" + scenario_id + ".txt" with open(snp_file, "r") as fd: snp = [list(map(float, line[0:-1].split())) for line in fd] sub_population_count = len(sub_population_size) print() print(sub_population_count, "subpopulations detected") # Now we can create the population. We want to give each population a population name, starting from A sub_population_names = list(map(chr, range(65, 65 + sub_population_count))) # We have two chromosomes. The first is an autosome with nb_loci loci, and the second is the mitochondrial chromosome with 1 locus pop = simuPOP.Population( sub_population_size, ploidy=2, loci=[nb_loci, 1], ancGen=2, infoFields=[ 'age', 'ind_id', 'father_id', 'mother_id', 'nitrogen', 'carbon', 'feeding_ground', 'native_breeding_ground', 'migrate_to' ], subPopNames=sub_population_names, chromTypes=[simuPOP.AUTOSOME, simuPOP.MITOCHONDRIAL]) sub_population_names = tuple(sub_population_names) # Create an attribute on each individual called 'age'. Set it to a random number between 0 and maxMatingAge # Note that size is a vector - the size of each population. We have to sum these to get the total number of individuals individual_count = sum(sub_population_size) # Assign a random age to each individual pop.setIndInfo( [random.randint(0, maxMatingAge) for x in range(individual_count)], 'age') # Assign a random feeding ground to each individual pop.setIndInfo([ random.randint(0, numberOfFeedingGrounds - 1) for x in range(individual_count) ], 'feeding_ground') # Currently we have these virtual subpopulations: # age < minMatingAge (juvenile) # age >= minMatingAge and age < maxMatingAge + 0.1 (age <= maxMatingAge) (mature) # age >= maxMatingAge (dead) # # Ideally we would want something like this: # 1) Immature # 2) Receptive female (every 3 years) # 3) Non-receptive female # 4) Mature male # 5) Dead # # Note that we use a cutoff InfoSplitter here, it is also possible to # provide a list of values, each corresponding to a virtual subpopulation. pop.setVirtualSplitter( simuPOP.CombinedSplitter([ simuPOP.ProductSplitter([ simuPOP.SexSplitter(), simuPOP.InfoSplitter('age', cutoff=[minMatingAge, maxMatingAge + 0.1], names=['juvenile', 'mature', 'dead']) ]) ], vspMap=[[0], [1], [2], [3], [4], [5], [0, 1, 3, 4], [1, 4]], names=[ 'Juvenile Male', 'Mature Male', 'Dead Male', 'Juvenile Female', 'Mature Female', 'Dead Female', 'Not dead yet', 'Active' ])) pop.evolve( initOps=[ simuPOP.InitSex(), simuPOP.IdTagger(), simuPOP.PyOperator(func=init_native_breeding_grounds) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=haplotype_frequencies[i], loci=[nb_loci]) for i in range(0, sub_population_count) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=[snp[n][i], 1 - snp[n][i]], loci=[n]) for i in range(0, sub_population_count) for n in range(0, nb_loci - 1) ], # increase age by 1 preOps=[simuPOP.InfoExec('age += 1')], matingScheme=simuPOP.HeteroMating( [ # age <= maxAge, copy to the next generation (weight=-1) # subPops is a list of tuples that will participate in mating. The tuple is a pair (subPopulation, virtualSubPopulation) # First, we propagate (clone) all individuals in all subpopulations (and all VSPs except the ones who are now in the VSP of deceased individuals) to the next generation simuPOP.CloneMating( ops=[simuPOP.CloneGenoTransmitter(chroms=[0, 1])], subPops=[ (sub_population, 6) for sub_population in range(0, sub_population_count) ], weight=-1), # Then we simulate random mating only in VSP 1 (ie reproductively mature individuals) within subpopulation (breeding/winter grounds) simuPOP.RandomMating( ops=[ simuPOP.MitochondrialGenoTransmitter(), simuPOP.MendelianGenoTransmitter(), simuPOP.IdTagger(), simuPOP.InheritTagger(mode=simuPOP.MATERNAL, infoFields=['feeding_ground']), simuPOP.InheritTagger( mode=simuPOP.MATERNAL, infoFields=['native_breeding_ground']), simuPOP.PedigreeTagger() ], subPops=[ (sub_population, 7) for sub_population in range(0, sub_population_count) ], weight=1) ], subPopSize=configure_new_population_size), postOps=[ # Determine the isotopic ratios in individuals simuPOP.PyOperator(func=postop_processing), simuPOP.Migrator(mode=simuPOP.BY_IND_INFO), # count the individuals in each virtual subpopulation #simuPOP.Stat(popSize=True, subPops=[(0,0), (0,1), (0,2), (1,0), (1, 1), (1, 2)]), # print virtual subpopulation sizes (there is no individual with age > maxAge after mating) #simuPOP.PyEval(r"'Size of age groups: %s\n' % (','.join(['%d' % x for x in subPopSize]))") # Alternatively, calculate the Fst # FIXME: How does this actually work? Does it work for > 2 populations? I don't really understand it yet # ELC: it is a calculation that partitions variance among and between populations, and can be calculated as a # global statistic or on a pairwise basis. We use it as an indication of genetic differentiation. simuPOP.Stat(structure=range(1), subPops=sub_population_names, suffix='_AB', step=10), simuPOP.PyEval(r"'Fst=%.3f \n' % (F_st_AB)", step=10) ], gen=years) #simuPOP.dump(pop, width=3, loci=[], subPops=[(simuPOP.ALL_AVAIL, simuPOP.ALL_AVAIL)], max=1000, structure=False); #return ped = simuPOP.Pedigree(pop) print("This is the pedigree stuff") simuPOP.dump(pop) # Now sample the individuals sample = drawRandomSample(pop, sizes=[sample_count] * sub_population_count) # Print out the allele frequency data simuPOP.stat(sample, alleleFreq=simuPOP.ALL_AVAIL) frequencies = sample.dvars().alleleFreq with open('freq.txt', 'w') as freqfile: index = 0 for locus in frequencies: if (locus == nb_loci): continue if (len(frequencies[locus]) < 2): continue print(index, end=' ', file=freqfile) index = index + 1 for allele in frequencies[locus]: print(frequencies[locus][allele], end=' ', file=freqfile) print(file=freqfile) # We want to remove monoallelic loci. This means a position in the genotype for which all individuals have the same value in both alleles # To implement this we will build up a list of loci that get ignored when we dump out the file. Generally speaking, if we add all the values up # then either they will sum to 0 (if all individuals have type 0) or to the number of individuals * 2 (if all individuals have type 1) geno_sum = [0] * (nb_loci + 1) * 2 for individual in sample.individuals(): geno_sum = list(map(add, geno_sum, individual.genotype())) final_sum = list( map(add, geno_sum[:(nb_loci + 1)], geno_sum[(nb_loci + 1):])) monoallelic_loci = [] for i in range(0, nb_loci): if final_sum[i] == 0 or final_sum[ i] == sample_count * sub_population_count * 2: monoallelic_loci = [i] + monoallelic_loci monoallelic_loci = sorted(monoallelic_loci, reverse=True) nb_ignored_loci = len(monoallelic_loci) # Generate the two files with open('mixfile.txt', 'w') as mixfile: with open('haploiso.txt', 'w') as haplofile: print(sub_population_count, nb_loci - nb_ignored_loci, 2, 1, file=mixfile) print("sex, haplotype, iso1, iso2, native_ground", file=haplofile) for i in range(0, nb_loci - nb_ignored_loci): print('Loc', i + 1, sep='_', file=mixfile) for individual in sample.individuals(): genotype = individual.genotype() print( 1 if individual.sex() == 1 else 0, genotype[nb_loci], individual.info('carbon'), individual.info('nitrogen'), # int(individual.info('native_breeding_ground')), file=haplofile, sep=' ') print(int(individual.info('native_breeding_ground') + 1), end=' ', file=mixfile) for i in range(0, nb_loci): if i not in monoallelic_loci: print(genotype[i] + 1, genotype[i + nb_loci + 1] + 1, ' ', end='', sep='', file=mixfile) print(file=mixfile) return sample
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim def demo(gen): return [500 + gen * 10, 1000 + gen * 10] pop = sim.Population(size=[500, 1000], infoFields='migrate_to') pop.evolve( initOps=sim.InitSex(), preOps=sim.Migrator(rate=[[0.8, 0.2], [0.4, 0.6]]), matingScheme=sim.RandomMating(subPopSize=demo), postOps=[sim.Stat(popSize=True), sim.PyEval(r'"%s\n" % subPopSize')], gen=3)
# record recombinations rc = RecombCollector(first_gen=pop.indInfo("ind_id"), ancestor_age=args.ancestor_age, length=2 * args.length, locus_position=locus_position + [args.length, 2 * args.length]) migr_mat = [[0, args.m, 0], [args.m, 0, args.M], [0, args.M, 0]] pop.evolve(initOps=[ sim.InitSex(), ] + init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), sim.Migrator(rate=migr_mat, mode=sim.BY_PROBABILITY), sim.SNPMutator(u=args.sel_mut_rate, v=args.sel_mut_rate), sim.PyMlSelector(fitness.left, subPops=fitness.left_subpops, output=">>" + selloci_file), sim.PyMlSelector(fitness.right, subPops=fitness.right_subpops, output=">>" + selloci_file), ], matingScheme=sim.RandomMating(ops=[ id_tagger, sim.Recombinator(intensity=args.recomb_rate, output=rc.collect_recombs, infoFields="ind_id"), ]), postOps=[
def main(): parser = argparse.ArgumentParser() parser.add_argument("--experiment", help="provide name for experiment", required=True, type=str, default="test") parser.add_argument("--debug", help="turn on debugging output") parser.add_argument("--reps", help="Replicated populations per parameter set", type=int, default=3) parser.add_argument("--networkfile", help="Name of GML file representing the network model for this simulation", required=True, type=str, default="smallworld") parser.add_argument("--numloci", help="Number of loci per individual (use with care)", type=int, required=True, default=1) parser.add_argument("--maxinittraits", help="Max initial number of traits per locus for initialization", type=int, required=True, default=50) parser.add_argument("--innovrate", nargs='+', help="Rate(s) at which innovations occur in population as a per-locus rate", type=float, default=[]) parser.add_argument("--simlength", help="Time at which simulation and sampling end, defaults to 3000 generations", type=int, default="20") parser.add_argument("--popsize", help="Initial size of population for each community in the model", type=int, required=True) parser.add_argument("--migrationfraction", nargs='+', help="Fraction of population that migrates each time step", type=float, required=True, default=[]) parser.add_argument("--seed", type=int, help="Seed for random generators to ensure replicability") parser.add_argument( "--k_values", nargs='+', type=int, help="list of k-values to explore [e.g., 2 4 20 24]", default=[]) parser.add_argument("--sub_pops", nargs="+", help="Number of sub populations", required=True, default=[]) parser.add_argument("--maxalleles", type=int, help="Maximum number of alleles", default=50) parser.add_argument("--save_figs", type=bool, help="Save figures or not?", default=False) parser.add_argument("--burnintime", type=int, help="How long to wait before making measurements? ", default=2000) parser.add_argument("--rewiringprob", type=float, help="Probability of random rewiring", default=0) config = parser.parse_args() # setup output directories for writing output_path = utils.setup_output(config.experiment) # check the k and migration rate combinations check = utils.check_k_and_migration_rates(config) if check is not True: print("\nProblem(s):\t %s\n" % check) print("Please adjust input values for k and/or migration rate and restart.\n ") sys.exit() else: print("\nChecked on the migration and k values -- all looks good!\n") # save parameters utils.save_parameters(str(sys.argv), config, output_path) # set up the frequencies for the alleles in each loci. Here assuming a uniform distribution as a starting point distribution = utils.constructUniformAllelicDistribution(config.maxinittraits) # prepare file for output output_data_file_name = "%s/%s-rare-trait-output.csv" % (output_path, config.experiment) with open(output_data_file_name, mode='w') as output_file: output_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) output_writer.writerow(["Iteration", "k", "NumSubPops", "Migration", "InnovationRate", "Ones_Mean", "Ones_95%_Lower", "Ones_95%_Upper", "Twos_Mean", "Twos_95%_Lower", "Twos_95%_Upper", "Richness_Mean", "Richness_95%_Lower", "Richness_95%_Upper","Fst_Mean","Fst_95%_Lower","Fst_95%_Upper"]) output_file.flush() subpop_run_values = config.sub_pops k_run_values = config.k_values mig_run_values = config.migrationfraction innov_run_values = config.innovrate iteration=-1 for subpop in subpop_run_values: if k_run_values == [0]: k_run_values = [2, int(float(subpop) * .1), int(float(subpop) * .2), int(float(subpop) * .5), int(float(subpop) * .8), int(float(subpop) * .9), int(subpop) - 1] for k in k_run_values: for mig in mig_run_values: for innov in innov_run_values: ## let us know whats happening iteration += 1 print("Now running with subpops: %s k-value: %s mig rate: %4f innov rate: %4f" % (subpop,k,mig,innov)) ## these are lists of things that simuPop will do at different stages init_ops = OrderedDict() pre_ops = OrderedDict() post_ops = OrderedDict() # Construct a demographic model #networkmodel = NetworkModel( networkmodel="/Users/clipo/Documents/PycharmProjects/RapaNuiSim/notebooks/test_graph.gml", networkmodel = network.NetworkModel( networkmodel=config.networkfile, simulation_id=config.experiment, sim_length=config.simlength, burn_in_time=config.burnintime, initial_subpop_size=config.popsize, migrationfraction=mig, sub_pops=subpop, connectedness=k, # if 0, then distance decay save_figs=config.save_figs, network_iteration=iteration) num_pops = networkmodel.get_subpopulation_number() sub_pop_size = int(config.popsize / num_pops) # The regional network model defines both of these, in order to configure an initial population for evolution # Construct the initial population pops = sp.Population(size = [sub_pop_size]*num_pops, subPopNames = str(list(networkmodel.get_subpopulation_names())), infoFields = 'migrate_to', ploidy=1, loci=config.numloci ) ### now set up the activities init_ops['acumulators'] = sp.PyOperator(utils.init_acumulators, param=['fst','alleleFreq', 'haploFreq']) init_ops['subpop_counts'] = sp.PyOperator(utils.init_count_traits_in_subpops) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(loci=list(range(config.numloci)),freq=distribution) post_ops['Innovate'] = sp.KAlleleMutator(k=config.maxalleles, rates=innov, loci=sp.ALL_AVAIL) post_ops['mig']=sp.Migrator(rate=networkmodel.get_migration_matrix()) #, reps=[3]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) post_ops['Stat-richness']=sp.Stat(alleleFreq=[0], haploFreq=[0], vars=['alleleFreq','haploFreq','alleleNum', 'genoNum']) post_ops['fst_acumulation'] = sp.PyOperator(utils.update_acumulator, param=['fst','F_st']) post_ops['richness_acumulation'] = sp.PyOperator(utils.update_richness_acumulator, param=('alleleFreq', 'Freq of Alleles')) post_ops['class_richness']=sp.PyOperator(utils.calculateAlleleAndGenotypeFrequencies, param=(config.popsize,config.numloci)) post_ops['count_traits_in_subpops'] = sp.PyOperator(utils.count_traits_in_subpops, param=(config.numloci,num_pops), subPops=sp.ALL_AVAIL) mating_scheme = sp.RandomSelection() ## go simuPop go! evolve your way to the future! sim = sp.Simulator(pops, rep=config.reps) sim.evolve(initOps=list(init_ops.values()), preOps=list(pre_ops.values()), postOps=list(post_ops.values()), matingScheme=mating_scheme, gen=config.simlength) count=0 for pop in sim.populations(): output[count] = deepcopy(pop.dvars()) count+=1 ones_point_in_time = [] twos_point_in_time = [] richness_point_in_time = [] fst_point_in_time = [] for n in range(config.reps): list_of_ones = list(output[n].ones) list_of_twos = list(output[n].twos) list_of_richness = list(output[n].richness) list_of_fst = list(output[n].fst) ones_point_in_time.append(list_of_ones[2000]) twos_point_in_time.append(list_of_twos[2000]) richness_point_in_time.append(list_of_richness[2000]) fst_point_in_time.append(list_of_fst[2000]) (ones_ave, ones_min, ones_max) = utils.mean_confidence_interval(ones_point_in_time, confidence=0.95) (twos_ave, twos_min, twos_max) = utils.mean_confidence_interval(twos_point_in_time, confidence=0.95) (richness_ave, richness_min, richness_max) = utils.mean_confidence_interval(richness_point_in_time, confidence=0.95) (fst_ave, fst_min, fst_max) = utils.mean_confidence_interval(fst_point_in_time, confidence=0.95) output_writer.writerow([iteration,k,subpop,mig,innov,ones_ave,ones_min,ones_max, twos_ave,twos_min,twos_max,richness_ave,richness_min,richness_max,fst_ave,fst_min,fst_max]) output_file.flush()
def simulation(self): self.pop = sim.Population(size = [500, 500], loci=[1]*20, infoFields = ["age",'ind_id', 'father_idx', 'mother_idx', "hc", "ywc",'migrate_to'], subPopNames = ["croatia", "slovenia"]) sim.initInfo(pop = self.pop, values = list(map(int, np.random.negative_binomial(n = 1, p = 0.25, size=500))), infoFields="age") self.pop.setVirtualSplitter(sim.CombinedSplitter([ sim.ProductSplitter([ sim.SexSplitter(), sim.InfoSplitter(field = "age", cutoff = [1,3,6,10])])], vspMap = [[0,1], [2], [3], [4], [5,6,7,8], [9] ])) # Age groups: from 0 to 1 - cubs, from 1 to 3 - prereproductive, from 3 to 6 - reproductive class, from 6 to 10 - dominant self.pop.evolve( initOps=[ sim.InitSex(), # random genotype sim.InitGenotype(freq=[0.01]*2 + [0.03]*2 + [0.23]*4), # assign an unique ID to everyone. sim.IdTagger(), ], # increase the age of everyone by 1 before mating. preOps=[sim.InfoExec('age += 1'), sim.InfoExec("hc +=1 if 0 < hc < 3 else 0"), # Mother bear can't have cubs for two years after pregnancy sim.Migrator(rate=[[self.cro_to_slo]], mode=sim.BY_PROPORTION, subPops=[(0, 0)], toSubPops=[1]), # reproductive males migrate from Cro to Slo sim.Migrator(rate=[[self.slo_to_cro]], mode=sim.BY_PROPORTION, subPops=[(1, 0)], toSubPops=[0]), sim.Stat(effectiveSize=sim.ALL_AVAIL, subPops=[(0,1),(0,2),(0,4), (1,1), (1,2), (1,4)], vars='Ne_demo_base'), sim.Stat(effectiveSize=sim.ALL_AVAIL,subPops=[(0,1),(0,2),(0,4), (1,1), (1,2), (1,4)], vars='Ne_demo_base_sp') #sim.PyEval(r'"Cro %d, Slo %d' ' % (Cro, Slo)', "Cro = pop.subPopSize(0)" "Slo = pop.subPopSize(1)",exposePop='pop'), ], matingScheme=sim.HeteroMating([ # CloneMating will keep individual sex and all # information fields (by default). # The age of offspring will be zero. sim.HomoMating(subPops=sim.ALL_AVAIL, chooser=sim.CombinedParentsChooser( fatherChooser=sim.PyParentsChooser(generator=self.bearFather), motherChooser=sim.PyParentsChooser(generator=self.bearMother) ), generator=sim.OffspringGenerator(ops=[ sim.InfoExec("age = 0"), sim.IdTagger(), #sim.PedigreeTagger(), sim.ParentsTagger(), sim.MendelianGenoTransmitter() ], numOffspring=(sim.UNIFORM_DISTRIBUTION, 1, 3))), sim.CloneMating(subPops=[(0,0), (0,1), (0,2), (0,4), (1,0), (1,1), (1,2), (1,4)], weight=-1), ], subPopSize=popmodel.demoModel), # number of individuals? postOps = [ #sim.PyOperator(func=popmodel.NaturalMortality), sim.PyOperator(func = popmodel.CalcNe, param={"me":self.me, "Ne":self.Ne}, begin=int(0.2*self.generations)), sim.PyOperator(func = popmodel.CalcLDNe, param={"me":self.me, "x":self.x}, begin=int(0.2*self.generations)), sim.PyOperator(func=popmodel.cullCountry,param={"slo_cull": self.slo_cull, "cro_cull": self.cro_cull}), ], gen = self.generations )
def evolvePop(model, N0, N1, G0, G1, initSpec, mu, k, fitness, m=1, migrRate=0, logfile='', sp_logfile='', **kwargs): '''Evolve a population with specified allele frequencies (parameter initSpec) using given demographic (model, N0, N1, G0, G1, m), mutation (a k-allele model with parameters mu and k) and natural selection models (a multi-locus selection model with fitness vector s). Total disease allele frequency and effective number of alleles in the population and in all subpopulations are recorded if names of log files are provided. This function returns a tuple of these two statistics at the end of the evolution. Additional keyword arguments could be used to control when and how often statisitcs are outputed. ''' L = len(fitness) // 3 if not hasattr(mu, '__iter__'): # if a single mutation rate is given mu = [mu] * L # Create expressions to output f_e and ne at all loci, which are # "%d\t%.4f\t%.4f\n" % (gen, 1-alleleFreq[x][0], ne[x]) # for locus x. statExpr = '"%d' + r'\t%.4f\t%.4f'*L + r'\n" % (gen,' + \ ', '.join(['1-alleleFreq[%d][0], ne[%d]' % (x, x) for x in range(L)]) + ')' demo_func = demoModel(model, N0, N1, G0, G1, m) pop = sim.Population(size=demo_func(0), loci=[1] * L, infoFields=['fitness', 'migrate_to']) pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=initSpec)], preOps=[ sim.KAlleleMutator(k=k, rates=mu, loci=range(L)), sim.MlSelector([ sim.MaSelector(loci=i, fitness=fitness[3 * i:3 * (i + 1)]) for i in range(L) ], mode=sim.MULTIPLICATIVE), sim.Migrator(rate=migrIslandRates(migrRate, m), begin=G0 + 1), ], matingScheme=sim.RandomMating(subPopSize=demo_func), postOps=[ sim.IfElse( logfile != '' or sp_logfile != '', Ne(loci=sim.ALL_AVAIL, vars=['ne'] if m == 1 else ['ne', 'ne_sp']), **kwargs), sim.IfElse(logfile != '', sim.PyEval(statExpr, output='>>' + logfile), **kwargs), sim.IfElse( m > 1 and sp_logfile != '', sim.PyEval( statExpr, output='>>' + sp_logfile, # subPops=sim.ALL_AVAIL will evalulate the expression in each # subpopulation's local namespace (vars(sp)). subPops=sim.ALL_AVAIL, begin=G0), **kwargs), ], finalOps=Ne(loci=sim.ALL_AVAIL), gen=G0 + G1) return tuple([1-pop.dvars().alleleFreq[x][0] for x in range(L)] + \ [pop.dvars().ne[x] for x in range(L)])