def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, recomb_rate, generations, length, init_ts): random.seed(123) pop = sim.Population(size=[popsize], loci=[nloci], lociPos=locus_position, infoFields=['ind_id']) # tag the first generation so we can pass it to rc id_tagger.apply(pop) first_gen = pop.indInfo("ind_id") haploid_labels = [(k, p) for k in first_gen for p in (0, 1)] node_ids = {x: j for x, j in zip(haploid_labels, init_ts.samples())} rc = ftprime.RecombCollector(ts=init_ts, node_ids=node_ids, locus_position=locus_position) recombinator = sim.Recombinator(intensity=recomb_rate, output=rc.collect_recombs, infoFields="ind_id") pop.evolve( initOps=[sim.InitSex()] + init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), # Must return true or false. True keeps whole population (?) ], matingScheme=mating_scheme_factory(recombinator, popsize, id_tagger), postOps=[sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1)], gen=generations) return pop, rc
def simulate(model, N0, N1, G0, G1, spec, s, mu, k): '''Evolve a sim.Population using given demographic model and observe the evolution of its allelic spectrum. model: type of demographic model. N0, N1, G0, G1: parameters of demographic model. spec: initial allelic spectrum, should be a list of allele frequencies for each allele. s: selection pressure. mu: mutation rate. k: k for the k-allele model ''' demo_func = demo_model(model, N0, N1, G0, G1) pop = sim.Population(size=demo_func(0), loci=1, infoFields='fitness') pop.evolve( initOps=[ sim.InitSex(), sim.InitGenotype(freq=spec, loci=0) ], matingScheme=sim.RandomMating(subPopSize=demo_func), postOps=[ sim.KAlleleMutator(k=k, rates=mu), sim.MaSelector(loci=0, fitness=[1, 1, 1 - s], wildtype=0), ne(loci=[0], step=100), sim.PyEval(r'"%d: %.2f\t%.2f\n" % (gen, 1 - alleleFreq[0][0], ne[0])', step=100), ], gen = G0 + G1 )
def checkNumOffspring(numOffspring, ops=[]): '''Check the number of offspring for each family using information field father_idx ''' pop = sim.Population(size=[30], loci=1, infoFields=['father_idx', 'mother_idx']) pop.evolve(initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5]), ], matingScheme=sim.RandomMating(ops=[ sim.MendelianGenoTransmitter(), sim.ParentsTagger(), ] + ops, numOffspring=numOffspring), gen=1) # get the parents of each offspring parents = [ (x, y) for x, y in zip(pop.indInfo('mother_idx'), pop.indInfo('father_idx')) ] # Individuals with identical parents are considered as siblings. famSize = [] lastParent = (-1, -1) for parent in parents: if parent == lastParent: famSize[-1] += 1 else: lastParent = parent famSize.append(1) return famSize
def checkSexMode(ms): '''Check the assignment of sex to offspring''' pop = sim.Population(size=[40]) pop.evolve(initOps=sim.InitSex(), matingScheme=ms, gen=1) # return individual sex as a string return ''.join( ['M' if ind.sex() == sim.MALE else 'F' for ind in pop.individuals()])
def LC_evolve(popSize, alleleFreq, diseaseModel): ''' ''' pop = sim.Population( size=popSize, loci=[1] * len(alleleFreq), infoFields=['age', 'smoking', 'age_death', 'age_LC', 'LC']) pop.setVirtualSplitter( sim.CombinedSplitter(splitters=[ sim.InfoSplitter(field='age', cutoff=[20, 40], names=['youngster', 'adult', 'senior']), sim.SexSplitter(), sim.InfoSplitter(field='smoking', values=[0, 1, 2], names=['nonSmoker', 'smoker', 'formerSmoker']) ])) pop.evolve( initOps=[sim.InitSex(), sim.InitInfo(range(75), infoFields='age')] + [ sim.InitGenotype(freq=[1 - f, f], loci=i) for i, f in enumerate(alleleFreq) ] + [ sim.PyOperator(func=diseaseModel.initialize), ], preOps=[ sim.InfoExec('age += 1'), # die of lung cancer or natural death sim.DiscardIf('age > age_death') ], matingScheme=sim.HeteroMating( [ sim.CloneMating(weight=-1), sim.RandomMating(ops=[ sim.MendelianGenoTransmitter(), sim.PyOperator(func=diseaseModel.initialize) ], subPops=[(0, 'adult')]) ], subPopSize=lambda pop: pop.popSize() + popSize / 75), postOps=[ # update individual, currently ding nothing. sim.PyOperator(func=diseaseModel.updateStatus), # determine if someone has LC at his or her age sim.InfoExec('LC = age >= age_LC'), # get statistics about COPD and LC prevalence sim.Stat(pop, meanOfInfo='LC', subPops=[(0, sim.ALL_AVAIL)], vars=['meanOfInfo', 'meanOfInfo_sp']), sim.PyEval( r"'Year %d: Overall %.2f%% M: %.2f%% F: %.2f%% " r"NS: %.1f%%, S: %.2f%%\n' % (gen, meanOfInfo['LC']*100, " r"subPop[(0,3)]['meanOfInfo']['LC']*100," r"subPop[(0,4)]['meanOfInfo']['LC']*100," r"subPop[(0,5)]['meanOfInfo']['LC']*100," r"subPop[(0,6)]['meanOfInfo']['LC']*100)"), ], gen=100)
def setUp(self): self.pop = simu.Population(size=10, loci=1, infoFields='self_gen') self.sim = simu.Simulator(pops=self.pop) self.initOps = [ simu.InitSex(sex=[simu.MALE, simu.FEMALE]), simu.InitInfo(0, infoFields=['self_gen']) ] self.sexMode = (simu.GLOBAL_SEQUENCE_OF_SEX, simu.MALE, simu.FEMALE)
def _create_single_pop(self, pop_size, nloci): init_ops = [] init_ops.append(sp.InitSex()) pop = sp.Population(pop_size, ploidy=2, loci=[1] * nloci, chromTypes=[sp.AUTOSOME] * nloci, infoFields=list(self._info_fields)) pre_ops = [] post_ops = [] return pop, init_ops, pre_ops, post_ops
def _create_island(self, pop_sizes, mig, nloci): init_ops = [] init_ops.append(sp.InitSex()) pop = sp.Population(pop_sizes, ploidy=2, loci=[1] * nloci, chromTypes=[sp.AUTOSOME] * nloci, infoFields=list(self._info_fields)) post_ops = [sp.Migrator( demography.migrIslandRates(mig, len(pop_sizes)))] pre_ops = [] self._info_fields.add('migrate_to') return pop, init_ops, pre_ops, post_ops
def simulate(): pop = sim.Population(1000, loci=10, infoFields='age') pop.evolve( initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5]), sim.InitInfo(lambda: random.randint(0, 10), infoFields='age') ], matingScheme=sim.RandomMating(), finalOps=sim.Stat(alleleFreq=0), gen=100 ) return pop.dvars().alleleFreq[0][0]
def simulate(demo, gen): pop = sim.Population(size=demo(0)) pop.evolve(initOps=sim.InitSex(), preOps=[ sim.Stat(popSize=True), sim.PyEval(r"'%d: %d ' % (gen, popSize)"), ], matingScheme=sim.RandomMating(subPopSize=demo), postOps=[ sim.Stat(popSize=True), sim.PyEval(r"'--> %d\n' % popSize"), ], gen=gen)
def setUp(self): # A locus with 10 sites "Let there are 5 loci with 2 sites each" self.allele_length = 2 self.loci = 5 self.pop = simu.Population(size=10, loci=self.allele_length * self.loci, infoFields='self_gen') self.sim = simu.Simulator(pops=self.pop) self.initOps = [ simu.InitSex(sex=[simu.MALE, simu.FEMALE]), simu.InitInfo(0, infoFields=['self_gen']) ] self.sexMode = (simu.GLOBAL_SEQUENCE_OF_SEX, simu.MALE, simu.FEMALE)
def get_mean_r2(Ne, S, n_loci, gens, repeats, n_subpops, initial_frequencies, m): M = get_migration_matrix(m, n_subpops) pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') sim.initGenotype(pop, freq=initial_frequencies) pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=initial_frequencies)], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(pop, sizes=[S] * n_subpops) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD=list(combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) r2s = [] for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = count = 0 for pairs in combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def expansion_through_random_mating(self, pop, expanded_pop_size, recombination_rates): # The purpose of this function is to use the simuPOP pre-defined mating scheme # RandomMating to grow the population to an arbitrary size. # Self-pollination occurs frequently in maize so we need use HermaphroditicMating # instead of RandomMating. return pop.evolve( initOps=sim.InitSex(), preOps=[ sim.PyEval(r'"Generation: %d\n" % gen'), sim.InfoExec('generation=gen'), ], matingScheme=sim.HermaphroditicMating( ops=[sim.Recombinator(rates=recombination_rates), sim.IdTagger(), sim.PedigreeTagger()], subPopSize=expanded_pop_size), gen=1)
def createSinglePop(popSize, nLoci, startLambda=99999, lbd=1.0): initOps = [sp.InitSex(maleFreq=cfg.maleProb)] if startLambda < 99999: preOps = [sp.ResizeSubPops(proportions=(float(lbd), ), begin=startLambda)] else: preOps = [] postOps = [] pop = sp.Population(popSize, ploidy=2, loci=[1] * nLoci, chromTypes=[sp.AUTOSOME] * nLoci, infoFields=["ind_id", "father_id", "mother_id", "age", "breed", "rep_succ", "mate", "force_skip"]) for ind in pop.individuals(): ind.breed = -1000 oExpr = ('"%s/samp/%f/%%d/%%d/smp-%d-%%d-%%d.txt" %% ' + '(numIndivs, numLoci, gen, rep)') % ( cfg.dataDir, cfg.mutFreq, popSize) return pop, initOps, preOps, postOps, oExpr
def get_FCs(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): ''''Runs simulations for allelic fluctuations model with n subpopulations, and returns a list of FC values (one for each subpopulation)''' # population to evolve ((from infinite gamete pool)) popNe = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') # initial sample population (from infinite gamete pool) popS = sim.Population(size=[S] * n_subpops, ploidy=2, loci=[1] * n_loci) sim.initGenotype(popNe, freq=initial_frequencies) sim.initGenotype(popS, freq=initial_frequencies) # get initial sample allele frequencies sim.stat(popS, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) M = get_migration_matrix(m, n_subpops) popNe.evolve(initOps=[sim.InitSex()], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(popNe, sizes=[S] * n_subpops) sim.stat(sample_pop, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) all_FCs = [] for sp in range(n_subpops): initial_allele_frequencies = popS.dvars(sp).alleleFreq final_allele_frequencies = sample_pop.dvars(sp).alleleFreq sp_count = 0 sp_FC = 0 for locus in range(n_loci): init_pair = repair(initial_allele_frequencies[locus]) end_pair = repair(final_allele_frequencies[locus]) if init_pair[0]**2 + init_pair[1]**2 != 1: sp_FC += fc_variant([init_pair[0], init_pair[1]], [end_pair[0], end_pair[1]]) sp_count += 1 all_FCs.append(sp_FC / sp_count) return all_FCs
def _create_stepping_stone(self, pop_sizes, mig, nloci): if len(pop_sizes) == 1: flat_pop_sizes = pop_sizes[0] post_ops = [sp.Migrator( demography.migrSteppingStoneRates(mig, len(flat_pop_sizes)))] else: flat_pop_sizes = [] for line in pop_sizes: flat_pop_sizes.extend(line) post_ops = [sp.Migrator( demography.migr2DSteppingStoneRates(mig, len(pop_sizes), len(pop_sizes[0])))] init_ops = [] init_ops.append(sp.InitSex()) pop = sp.Population(flat_pop_sizes, ploidy=2, loci=[1] * nloci, chromTypes=[sp.AUTOSOME] * nloci, infoFields=list(self._info_fields)) pre_ops = [] self._info_fields.add('migrate_to') return pop, init_ops, pre_ops, post_ops
def population_structure_guided_expansion(self, pop, recombination_rates): """ Uses a population structure matrix to determine the probability of selecting a second parent given the first parent's probability mass function. """ ps_pc = breed.ForcedPopulationStructureParentChooser( self.population_size) print( "Executing population expansion using estimated population structure.") return pop.evolve( initOps=sim.InitSex(), preOps=[ sim.InfoExec('generation=gen'), ], matingScheme=sim.HomoMating( sim.PyParentsChooser(ps_pc.forced_structure_parent_chooser), sim.OffspringGenerator(ops=[ sim.IdTagger(), sim.PedigreeTagger(), sim.Recombinator(rates=recombination_rates), ]), subPopSize=self.population_size), gen=1)
global counter for line in mutants.split('\n'): # a trailing \n will lead to an empty string if not line: continue (gen, loc, ploidy, a1, a2, id) = line.split('\t') counter[int(loc)] += 1 pop = sim.Population( [5000] * 3, loci=[2, 1, 1], infoFields='ind_id', chromTypes=[sim.AUTOSOME, sim.CHROMOSOME_X, sim.CHROMOSOME_Y]) pop.evolve( initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5]), sim.IdTagger(), ], preOps=[ sim.KAlleleMutator(rates=[0.001] + [0.01] * 3, loci=range(4), k=100, output=countMutants), ], matingScheme=sim.RandomMating( ops=[sim.IdTagger(), sim.MendelianGenoTransmitter()]), gen=10) print(counter.items())
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim pop = sim.Population(size=1000, loci=1, infoFields='fitness') s1 = .1 s2 = .2 pop.evolve(initOps=[sim.InitSex(), sim.InitGenotype(freq=[.2] * 5)], preOps=sim.MaSelector(loci=0, fitness=[1 - s1, 1, 1 - s2]), matingScheme=sim.RandomMating(), postOps=[ sim.Stat(alleleFreq=0), sim.PyEval(r"'%.4f\n' % alleleFreq[0][0]", step=100) ], gen=301)
# BB Bb bb # AA 1 1 1 # Aa 1 1-s1 1-s2 # aa 1 1 1-s2 # # geno is (A1 A2 B1 B2) if geno[0] + geno[1] == 1 and geno[2] + geno[3] == 1: v = 1 - s1 # case of AaBb elif geno[2] + geno[3] == 2: v = 1 - s2 # case of ??bb else: v = 1 # other cases if smoking: return v * 0.9 else: return v pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=[.5, .5])], preOps=sim.PySelector(loci=[0, 1], func=sel), matingScheme=sim.RandomMating(), postOps=[ # set smoking status randomly sim.InitInfo(lambda: random.randint(0, 1), infoFields='smoking'), sim.Stat(alleleFreq=[0, 1], step=20), sim.PyEval(r"'%.4f\t%.4f\n' % (alleleFreq[0][1], alleleFreq[1][1])", step=20) ], gen=50)
def __call__(self, pop): # define __call__ so that a RandomNumOff object is callable. # # Each male produce from 1 to 3 offspring. For large population, get the # number of males instead of checking the sex of each individual self.numOff = [ random.randint(1, 3) for ind in pop.individuals() if ind.sex() == sim.MALE ] # return the total population size print('{} mating events with number of offspring {}'.format( len(self.numOff), self.numOff)) return sum(self.numOff) pop = sim.Population(10) # create a demogranic model numOffModel = RandomNumOff() pop.evolve( preOps=sim.InitSex(), matingScheme=sim.RandomMating( # the model will be called before mating to deteremine # family and population size subPopSize=numOffModel, # the getNumOff function (generator) returns number of offspring # for each mating event numOffspring=numOffModel.getNumOff), gen=3)
def get_mean_r2(): ########################### full_estimates = {} for m in ms: m_adj = m / (n_subpops-1) M = np.full( (n_subpops,n_subpops), m_adj ) np.fill_diagonal(M, 0) M = M.tolist() r2s = [] estimates = [] for r in range(repeats): print(r+1) # set up population pop = sim.Population(size=[Ne]*n_subpops, ploidy=2, loci=[1]*n_loci, infoFields = 'migrate_to') # evolve population pop.evolve( initOps = [sim.InitSex(), sim.InitGenotype(freq = [0.5,0.5])], preOps = sim.Migrator(rate=M), matingScheme = sim.RandomMating(), gen = gens ) # take sample of size S sample_pop = drawRandomSample(pop, sizes = [S]*n_subpops) # get allele frequency sim.stat(sample_pop, alleleFreq = range(0,n_loci), vars = ['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD = list(combinations(list(range(n_loci)), r=2)), vars = ['R2_sp']) estimates.append([]) r2s.append([]) for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [] # find which alleles are segregating for k in allele_freqs.keys(): if (allele_freqs[k][0] > 0.04) and (allele_freqs[k][1] > 0.04): seg_alleles.append(k) # only proceed if there are 2 or more segregating alleles (to measure r2) if len(seg_alleles) < 2: continue # calculate mean r2 r2_total = 0 count = 0 for pairs in combinations(seg_alleles, r=2): r2_i = sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] r2_total += r2_i count+=1 mean_r2 = r2_total / count # correct r2 for sample size r2_drift = (mean_r2 - 1/(2*S)) / (1 - 1/(2*S)) #get Ne estimate Ne_est = 1/(3*r2_drift) estimates[-1].append(Ne_est) r2s[-1].append(r2_drift) full_estimates[m] = estimates means = [np.mean(full_estimates[m]) for m in ms] plt.scatter(ms, means, edgecolors='black', color = 'white') plt.plot([min(ms),max(ms)], [100,100], 'k--') plt.xscale('log') plt.xticks(ticks = ms, labels = ms) plt.ylim(50,150) plt.xlim(min(ms)*0.95,max(ms)*1.05) plt.show()
def simulation(self): self.pop = sim.Population(size = [500, 500], loci=[1]*20, infoFields = ["age",'ind_id', 'father_idx', 'mother_idx', "hc", "ywc",'migrate_to'], subPopNames = ["croatia", "slovenia"]) sim.initInfo(pop = self.pop, values = list(map(int, np.random.negative_binomial(n = 1, p = 0.25, size=500))), infoFields="age") self.pop.setVirtualSplitter(sim.CombinedSplitter([ sim.ProductSplitter([ sim.SexSplitter(), sim.InfoSplitter(field = "age", cutoff = [1,3,6,10])])], vspMap = [[0,1], [2], [3], [4], [5,6,7,8], [9] ])) # Age groups: from 0 to 1 - cubs, from 1 to 3 - prereproductive, from 3 to 6 - reproductive class, from 6 to 10 - dominant self.pop.evolve( initOps=[ sim.InitSex(), # random genotype sim.InitGenotype(freq=[0.01]*2 + [0.03]*2 + [0.23]*4), # assign an unique ID to everyone. sim.IdTagger(), ], # increase the age of everyone by 1 before mating. preOps=[sim.InfoExec('age += 1'), sim.InfoExec("hc +=1 if 0 < hc < 3 else 0"), # Mother bear can't have cubs for two years after pregnancy sim.Migrator(rate=[[self.cro_to_slo]], mode=sim.BY_PROPORTION, subPops=[(0, 0)], toSubPops=[1]), # reproductive males migrate from Cro to Slo sim.Migrator(rate=[[self.slo_to_cro]], mode=sim.BY_PROPORTION, subPops=[(1, 0)], toSubPops=[0]), sim.Stat(effectiveSize=sim.ALL_AVAIL, subPops=[(0,1),(0,2),(0,4), (1,1), (1,2), (1,4)], vars='Ne_demo_base'), sim.Stat(effectiveSize=sim.ALL_AVAIL,subPops=[(0,1),(0,2),(0,4), (1,1), (1,2), (1,4)], vars='Ne_demo_base_sp') #sim.PyEval(r'"Cro %d, Slo %d' ' % (Cro, Slo)', "Cro = pop.subPopSize(0)" "Slo = pop.subPopSize(1)",exposePop='pop'), ], matingScheme=sim.HeteroMating([ # CloneMating will keep individual sex and all # information fields (by default). # The age of offspring will be zero. sim.HomoMating(subPops=sim.ALL_AVAIL, chooser=sim.CombinedParentsChooser( fatherChooser=sim.PyParentsChooser(generator=self.bearFather), motherChooser=sim.PyParentsChooser(generator=self.bearMother) ), generator=sim.OffspringGenerator(ops=[ sim.InfoExec("age = 0"), sim.IdTagger(), #sim.PedigreeTagger(), sim.ParentsTagger(), sim.MendelianGenoTransmitter() ], numOffspring=(sim.UNIFORM_DISTRIBUTION, 1, 3))), sim.CloneMating(subPops=[(0,0), (0,1), (0,2), (0,4), (1,0), (1,1), (1,2), (1,4)], weight=-1), ], subPopSize=popmodel.demoModel), # number of individuals? postOps = [ #sim.PyOperator(func=popmodel.NaturalMortality), sim.PyOperator(func = popmodel.CalcNe, param={"me":self.me, "Ne":self.Ne}, begin=int(0.2*self.generations)), sim.PyOperator(func = popmodel.CalcLDNe, param={"me":self.me, "x":self.x}, begin=int(0.2*self.generations)), sim.PyOperator(func=popmodel.cullCountry,param={"slo_cull": self.slo_cull, "cro_cull": self.cro_cull}), ], gen = self.generations )
def main(): # Check for arguments passed try: opts, args = getopt.getopt(sys.argv[1:], shortopts="vhd:p1:p2:s:n:l:e:f:i:m:g:r:", longopts=[ "verbose", "help", "distribution=", "parameter1=", "parameter2=", "size=", "number=", "loci=", "effect=", "mean=", "filename=", "heritability=", "gen=", "rrate=" ]) except getopt.GetoptError as err: print(err) usage() sys.exit() verbose = False filename = "my" size = 1000 number = 100 heritability = 0.2 mean = 2.0 gen = 5 rrate = 0.0 print "\n" for o in opts: if o[0] in ("-v", "--verbose"): verbose = True print("Verbose mode") for o in opts: if o[0] in ("-d", "--distribution"): distribution = float(o[1]) if distribution == 0: parameter1 = None parameter2 = None if verbose: print "Simulation will occur with Normal Distribution" elif distribution == 1: if verbose: print "Simulation will occur with Gamma Distribution" for o in opts: if o[0] in ("-p1", "--parameter1"): parameter1 = float(o[1]) if verbose: print "Gamma distribution will occur with alpha:", parameter1 for o in opts: if o[0] in ("-p2", "--parameter2"): parameter2 = float(o[1]) if verbose: print "Gamma distribution will occur with beta:", parameter2 elif distribution != 0 or distribution != 1: sys.exit( "Error message: Distribution option must be either 0 or 1") for o in opts: if o[0] in ("-p2", "--parameter2"): bbeta = float(o[1]) if verbose: print "Gamma distribution will occur with beta:", bbeta for o in opts: if o[0] in ("-s", "--size"): individuals = o[1].split(",") individuals = map(int, individuals) if verbose: print "Population size/s is set at", individuals for o in opts: if o[0] in ("-h", "--help"): usage() sys.exit() elif o[0] in ("-n", "--number"): number = o[1] if verbose: print "Number of loci per individual is set at", number elif o[0] in ("-l", "--loci"): global loci loci = o[1].split(",") loci = map(int, loci) if verbose: print "Loci positions per individual are:", loci elif o[0] in ("-e", "--effect"): global effects effects = o[1].split(",") effects = map(float, effects) if verbose: print "Effects for loci per individual are:", effects elif o[0] in ("-f", "--filename"): filename = o[1] if verbose: print "File will be saved as:", filename elif o[0] in ("-i", "--heritability"): heritability = float(o[1]) if verbose: print "Heritability for simulation specified as:", heritability elif o[0] in ("-m", "--mean"): mean = o[1].split(",") mean = map(float, mean) if len(mean) == 1 and len(individuals) > 1: mean = numpy.array(mean) mean = numpy.repeat(mean, len(individuals), axis=0) mean = list(mean) if verbose: print "Population mean/s specified as:", mean elif o[0] in ("-g", "--gen"): gen = int(o[1]) if verbose: print "Generations to evolve specified as:", gen elif o[0] in ("-r", "--rrate"): rrate = float(o[1]) if verbose: print "Recombination will occur with rate:", rrate ## Start quantitative trait simulation if verbose: print "Creating population..." pop = sim.Population(size=individuals, loci=int(number), infoFields=["qtrait"]) if verbose: print "Evolving population..." pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(prop=[0.7, 0.3])], matingScheme=sim.RandomMating(ops=sim.Recombinator(rates=rrate)), postOps=[ sim.PyQuanTrait(loci=loci, func=additive_model, infoFields=["qtrait"]) ], gen=gen) if verbose: print "Coalescent process complete. Population evolved with", pop.numSubPop( ), "sub-populations." genotypes = list() for i in pop.individuals(): genotypes.append(i.genotype()) phenotypes = list() for i in pop.individuals(): phenotypes.append(i.qtrait) # fun() obtains the heritability equation set to zero for various settings of sigma (standard deviation) #NOTE: May need to tweak gamma distribution parameters to be appropriate for data! def fun(sigma, h): x_exact = list() count = 0 for i in phenotypes: current_mean = mean[pop.subPopIndPair(count)[0]] x_exact.append(current_mean + i) count += 1 x_random = list() #bbeta=((sigma**2)/current_mean) #Set up approximate beta variable for gamma distribution count = 0 for each in phenotypes: current_mean = mean[pop.subPopIndPair(count)[0]] x_random.append(random.normalvariate(current_mean + each, sigma)) count += 1 r = pearsonr(x_exact, x_random)[0] return r - math.sqrt(h) if verbose: print "Building polynomial model for variance tuning..." # Fits a polynomial model in numpy to the values obtained from the fun() function points = list() for i in drange(0, max(effects) * 10, 0.001): points.append(i) y_points = list() for i in points: y_points.append(fun(i, heritability)) z = numpy.polyfit(x=points, y=y_points, deg=3) p = numpy.poly1d(z) # Netwon's method finds the polynomial model's roots def newton(p): xn = 100 p_d = p.deriv() count = 0 while abs(p(xn)) > 0.01: if count > 1000: print "Unable to converge after 1000 iterations...\nPlease choose different settings." usage() sys.exit() count += 1 xn = xn - p(xn) / p_d(xn) if xn < 0.0: xn = 0.0 if verbose: print "Estimated variance of phenotypes for specified heriability: ", xn return xn if verbose: print "Using Newton's method to find polynomial roots..." # Files are saved to the specified location estimated_variance = newton(p) new_phenotypes = list() count = 0 for o in opts: if o[0] in ("-d", "--distribution"): if distribution == 0: for each in phenotypes: current_mean = mean[pop.subPopIndPair(count)[0]] new_phenotypes.append( random.normalvariate(current_mean + each, estimated_variance)) count += 1 elif distribution == 1: for each in phenotypes: current_mean = mean[pop.subPopIndPair(count)[0]] new_phenotypes.append( random.gammavariate( (current_mean + each) / parameter2, ((estimated_variance / parameter1)**0.5))) count += 1 f = open(filename + "_qtrait.txt", "w") f.write("\n".join(map(lambda x: str(x), new_phenotypes))) f.close() numpy.savetxt(filename + "_kt_ote2.txt", numpy.column_stack((loci, numpy.array(effects)))) export(pop, format='ped', output=filename + '_genomes.ped') export(pop, format='map', output=filename + '_genomes.map') print "\n\n"
def main(): ## Check for arguments passed try: opts, args = getopt.getopt(sys.argv[1:], shortopts="vhs:n:l:e:f:i:", longopts=[ "verbose", "help", "size=", "number=", "loci=", "effect=", "filename=", "herit=" ]) except getopt.GetoptError as err: print(err) usage() sys.exit() verbose = False has_filename = False print "\n" for o in opts: if o[0] in ("-v", "--verbose"): verbose = True print("Verbose mode") for o in opts: if o[0] in ("-h", "--help"): usage() sys.exit() elif o[0] in ("-s", "--size"): individuals = o[1] if verbose: print "Population size is set at", individuals elif o[0] in ("-n", "--number"): number = o[1] if verbose: print "Number of loci per individual is set at", number elif o[0] in ("-l", "--loci"): global loci loci = o[1].split(",") loci = map(int, loci) if verbose: print "Loci positions per individual are:", loci elif o[0] in ("-e", "--effect"): global effects effects = o[1].split(",") effects = map(float, effects) if verbose: print "Effects for loci per individual are:", effects elif o[0] in ("-f", "--filename"): filename = o[1] has_filename = True if verbose: print "File will be saved as:", filename elif o[0] in ("-i", "--herit"): heritability = float(o[1]) has_heritability = True if verbose: print "Heritability for simulation specified as:", heritability ## Start quantitative trait simulation if verbose: print "Creating population..." pop = sim.Population(size=int(individuals), loci=int(number), infoFields=["qtrait"]) if verbose: print "Evolving population..." pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(prop=[0.7, 0.3])], matingScheme=sim.RandomMating(), #postOps=[sim.PyQuanTrait(loci=loci, func=trait, infoFields=["qtrait"])], gen=5) if verbose: print "Population evolved." genotypes = list() for i in pop.individuals(): genotypes.append(i.genotype()) #print i.genotype() def fun(sigma2, h): exact_traits = list() for i in genotypes: exact_traits.append(exact_trait(i)) prob_traits = list() for i in genotypes: prob_traits.append(prob_trait(i, sigma2)) r = pearsonr(exact_traits, prob_traits)[0] return r - math.sqrt(h) def newton(p): xn = 100 p_d = p.deriv() count = 0 while abs(p(xn)) > 0.01: if count > 1000: print "Unable to converge after 1000 iterations..." sys.exit() count += 1 xn = xn - p(xn) / p_d(xn) if verbose: print "Estimated variance using Newton's method for solving roots is: ", xn return xn ## Make sure to change to 100 points around the average "effects" my_points = list() for i in range(100): my_points.append(fun(i, heritability)) z = numpy.polyfit(x=my_points, y=range(100), deg=2) #print z p = numpy.poly1d(z) if verbose: print "Polynomial fit for finding tuning parameter to match heritability: \n", p estimated_variance = newton(p) #print estimated_variance phenotypes = list() for i in pop.individuals(): phenotypes.append(prob_trait(i.genotype(), estimated_variance)) #print phenotypes if has_filename is False: filename = "my" f = open(filename + "_qtrait.txt", "w") f.write("\n".join(map(lambda x: str(x), phenotypes))) f.close() saveCSV(pop, filename + "_genomes.csv") print "\n\n"
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim import random def demo(pop): return [x + random.randint(50, 100) for x in pop.subPopSizes()] pop = sim.Population(size=[500, 1000], infoFields='migrate_to') pop.evolve( initOps=sim.InitSex(), matingScheme=sim.RandomMating(subPopSize=demo), postOps=[ sim.Stat(popSize=True), sim.PyEval(r'"%s\n" % subPopSize') ], gen = 3 )
import simuPOP as sim from simuPOP.utils import migrIslandRates p = [0.2, 0.3, 0.5] pop = sim.Population(size=[10000] * 3, loci=1, infoFields='migrate_to') simu = sim.Simulator(pop, rep=2) simu.evolve( initOps=[sim.InitSex()] + [sim.InitGenotype(prop=[p[i], 1 - p[i]], subPops=i) for i in range(3)], preOps=sim.Migrator(rate=migrIslandRates(0.01, 3), reps=0), matingScheme=sim.RandomMating(), postOps=[ sim.Stat(alleleFreq=0, structure=0, vars='alleleFreq_sp', step=50), sim.PyEval( "'Fst=%.3f (%s)\t' % (F_st, ', '.join(['%.2f' % " "subPop[x]['alleleFreq'][0][0] for x in range(3)]))", step=50), sim.PyOutput('\n', reps=-1, step=50), ], gen=201)
def runSimulation(scenario_id, sub_population_size, minMatingAge, maxMatingAge, gen): ''' sub_population_size A vector giving the population sizes for each sub-population. The subpopulations determine which breeding ground an individual belongs to minMatingAge minimal mating age. maxMatingAge maximal mating age. Individuals older than this are effectively dead years number of years to simulate ''' # scenario_id describes the batch of files to load # The mitochondrial DNA will be in mtdna_<scenario_id> # The SNP DNA will be in snp_<scenario_id> # Read the mitochondrial haplotype frequencies. There's a bit to unpack here # We read the lines into an array, and for each one, call split() on it to get one element per column. # However, we do not want this - we want the transpose, where haplotype_frequencies[0] is a vector of # all the frequencies for population 0, and haplotype_frequencies[1] is the corresponding vector for # population 2. list(map(list, zip(*t))) will achieve this transformation for us. # While we are at it, we also convert the strings into floats. mitochondrial_file = "mtdna_" + scenario_id + ".txt" with open(mitochondrial_file, "r") as fd: haplotype_frequencies = list( map(list, zip(*[list(map(float, line[0:-1].split())) for line in fd]))) if len(haplotype_frequencies) != len(sub_population_size): raise ValueError( 'The number of populations in the population size vector and the number of populations deduced from the haplotype file are different' ) # Now read the SNP data. This builds a 2D array indexed as snp[locus][population] snp_file = "snp_" + scenario_id + ".txt" with open(snp_file, "r") as fd: snp = [list(map(float, line[0:-1].split())) for line in fd] sub_population_count = len(sub_population_size) print() print(sub_population_count, "subpopulations detected") # Now we can create the population. We want to give each population a population name, starting from A sub_population_names = list(map(chr, range(65, 65 + sub_population_count))) # We have two chromosomes. The first is an autosome with nb_loci loci, and the second is the mitochondrial chromosome with 1 locus pop = simuPOP.Population( sub_population_size, ploidy=2, loci=[nb_loci, 1], ancGen=2, infoFields=[ 'age', 'ind_id', 'father_id', 'mother_id', 'nitrogen', 'carbon', 'feeding_ground', 'native_breeding_ground', 'migrate_to' ], subPopNames=sub_population_names, chromTypes=[simuPOP.AUTOSOME, simuPOP.MITOCHONDRIAL]) sub_population_names = tuple(sub_population_names) # Create an attribute on each individual called 'age'. Set it to a random number between 0 and maxMatingAge # Note that size is a vector - the size of each population. We have to sum these to get the total number of individuals individual_count = sum(sub_population_size) # Assign a random age to each individual pop.setIndInfo( [random.randint(0, maxMatingAge) for x in range(individual_count)], 'age') # Assign a random feeding ground to each individual pop.setIndInfo([ random.randint(0, numberOfFeedingGrounds - 1) for x in range(individual_count) ], 'feeding_ground') # Currently we have these virtual subpopulations: # age < minMatingAge (juvenile) # age >= minMatingAge and age < maxMatingAge + 0.1 (age <= maxMatingAge) (mature) # age >= maxMatingAge (dead) # # Ideally we would want something like this: # 1) Immature # 2) Receptive female (every 3 years) # 3) Non-receptive female # 4) Mature male # 5) Dead # # Note that we use a cutoff InfoSplitter here, it is also possible to # provide a list of values, each corresponding to a virtual subpopulation. pop.setVirtualSplitter( simuPOP.CombinedSplitter([ simuPOP.ProductSplitter([ simuPOP.SexSplitter(), simuPOP.InfoSplitter('age', cutoff=[minMatingAge, maxMatingAge + 0.1], names=['juvenile', 'mature', 'dead']) ]) ], vspMap=[[0], [1], [2], [3], [4], [5], [0, 1, 3, 4], [1, 4]], names=[ 'Juvenile Male', 'Mature Male', 'Dead Male', 'Juvenile Female', 'Mature Female', 'Dead Female', 'Not dead yet', 'Active' ])) pop.evolve( initOps=[ simuPOP.InitSex(), simuPOP.IdTagger(), simuPOP.PyOperator(func=init_native_breeding_grounds) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=haplotype_frequencies[i], loci=[nb_loci]) for i in range(0, sub_population_count) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=[snp[n][i], 1 - snp[n][i]], loci=[n]) for i in range(0, sub_population_count) for n in range(0, nb_loci - 1) ], # increase age by 1 preOps=[simuPOP.InfoExec('age += 1')], matingScheme=simuPOP.HeteroMating( [ # age <= maxAge, copy to the next generation (weight=-1) # subPops is a list of tuples that will participate in mating. The tuple is a pair (subPopulation, virtualSubPopulation) # First, we propagate (clone) all individuals in all subpopulations (and all VSPs except the ones who are now in the VSP of deceased individuals) to the next generation simuPOP.CloneMating( ops=[simuPOP.CloneGenoTransmitter(chroms=[0, 1])], subPops=[ (sub_population, 6) for sub_population in range(0, sub_population_count) ], weight=-1), # Then we simulate random mating only in VSP 1 (ie reproductively mature individuals) within subpopulation (breeding/winter grounds) simuPOP.RandomMating( ops=[ simuPOP.MitochondrialGenoTransmitter(), simuPOP.MendelianGenoTransmitter(), simuPOP.IdTagger(), simuPOP.InheritTagger(mode=simuPOP.MATERNAL, infoFields=['feeding_ground']), simuPOP.InheritTagger( mode=simuPOP.MATERNAL, infoFields=['native_breeding_ground']), simuPOP.PedigreeTagger() ], subPops=[ (sub_population, 7) for sub_population in range(0, sub_population_count) ], weight=1) ], subPopSize=configure_new_population_size), postOps=[ # Determine the isotopic ratios in individuals simuPOP.PyOperator(func=postop_processing), simuPOP.Migrator(mode=simuPOP.BY_IND_INFO), # count the individuals in each virtual subpopulation #simuPOP.Stat(popSize=True, subPops=[(0,0), (0,1), (0,2), (1,0), (1, 1), (1, 2)]), # print virtual subpopulation sizes (there is no individual with age > maxAge after mating) #simuPOP.PyEval(r"'Size of age groups: %s\n' % (','.join(['%d' % x for x in subPopSize]))") # Alternatively, calculate the Fst # FIXME: How does this actually work? Does it work for > 2 populations? I don't really understand it yet # ELC: it is a calculation that partitions variance among and between populations, and can be calculated as a # global statistic or on a pairwise basis. We use it as an indication of genetic differentiation. simuPOP.Stat(structure=range(1), subPops=sub_population_names, suffix='_AB', step=10), simuPOP.PyEval(r"'Fst=%.3f \n' % (F_st_AB)", step=10) ], gen=years) #simuPOP.dump(pop, width=3, loci=[], subPops=[(simuPOP.ALL_AVAIL, simuPOP.ALL_AVAIL)], max=1000, structure=False); #return ped = simuPOP.Pedigree(pop) print("This is the pedigree stuff") simuPOP.dump(pop) # Now sample the individuals sample = drawRandomSample(pop, sizes=[sample_count] * sub_population_count) # Print out the allele frequency data simuPOP.stat(sample, alleleFreq=simuPOP.ALL_AVAIL) frequencies = sample.dvars().alleleFreq with open('freq.txt', 'w') as freqfile: index = 0 for locus in frequencies: if (locus == nb_loci): continue if (len(frequencies[locus]) < 2): continue print(index, end=' ', file=freqfile) index = index + 1 for allele in frequencies[locus]: print(frequencies[locus][allele], end=' ', file=freqfile) print(file=freqfile) # We want to remove monoallelic loci. This means a position in the genotype for which all individuals have the same value in both alleles # To implement this we will build up a list of loci that get ignored when we dump out the file. Generally speaking, if we add all the values up # then either they will sum to 0 (if all individuals have type 0) or to the number of individuals * 2 (if all individuals have type 1) geno_sum = [0] * (nb_loci + 1) * 2 for individual in sample.individuals(): geno_sum = list(map(add, geno_sum, individual.genotype())) final_sum = list( map(add, geno_sum[:(nb_loci + 1)], geno_sum[(nb_loci + 1):])) monoallelic_loci = [] for i in range(0, nb_loci): if final_sum[i] == 0 or final_sum[ i] == sample_count * sub_population_count * 2: monoallelic_loci = [i] + monoallelic_loci monoallelic_loci = sorted(monoallelic_loci, reverse=True) nb_ignored_loci = len(monoallelic_loci) # Generate the two files with open('mixfile.txt', 'w') as mixfile: with open('haploiso.txt', 'w') as haplofile: print(sub_population_count, nb_loci - nb_ignored_loci, 2, 1, file=mixfile) print("sex, haplotype, iso1, iso2, native_ground", file=haplofile) for i in range(0, nb_loci - nb_ignored_loci): print('Loc', i + 1, sep='_', file=mixfile) for individual in sample.individuals(): genotype = individual.genotype() print( 1 if individual.sex() == 1 else 0, genotype[nb_loci], individual.info('carbon'), individual.info('nitrogen'), # int(individual.info('native_breeding_ground')), file=haplofile, sep=' ') print(int(individual.info('native_breeding_ground') + 1), end=' ', file=mixfile) for i in range(0, nb_loci): if i not in monoallelic_loci: print(genotype[i] + 1, genotype[i + nb_loci + 1] + 1, ' ', end='', sep='', file=mixfile) print(file=mixfile) return sample
return [22, 44] else: return [30, 60] pop = sim.Population( size=[8, 16], ploidy=2, loci=[0, 1, 2], infoFields=['ind_id', 'father_id', 'mother_id', 'gen_id', 'sp_id'], lociPos=(1, 2, 3), ancGen=gen_evolve) pop.evolve( initOps=[ sim.IdTagger(begin=0, end=-1), sim.InitSex(maleProp=0.5), sim.InitGenotype(freq=[0.2, 0.2, 0.2, 0.2, 0.2], loci=[0, 1, 2]), sim.PedigreeTagger(output='>>simp_Pedigree.ped', outputLoci=[0, 1, 2], outputFields=['gen_id', 'sp_id']) ], #end of initOps preOps=[ PyOperator(lambda pop: [ pop.setIndInfo(x, "sp_id", x) for x in range(pop.numSubPop()) ] is not None), ], matingScheme=sim.MonogamousMating( subPopSize=censuscontrol, numOffspring=8, sexMode=(sim.NUM_OF_MALES, 2), ops=[
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim simu = sim.Simulator(sim.Population(100, loci=[20]), 5) simu.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=[0.2, 0.8])], matingScheme=sim.RandomMating(), postOps=[ sim.Stat(alleleFreq=0, step=10), sim.PyEval('gen', step=10, reps=0), sim.PyEval(r"'\t%.2f' % alleleFreq[0][0]", step=10, reps=(0, 2, -1)), sim.PyOutput('\n', step=10, reps=-1) ], gen=30, )