def calculateAlleleAndGenotypeFrequencies(pop, param): (popsize, num_loci) = param sp.stat(pop, haploFreq=range(0, num_loci), vars=['haploFreq', 'haploNum']) #sim.stat(pop, alleleFreq = sim.ALL_AVAIL) keys = list(pop.dvars().haploFreq.keys()) haplotype_map = pop.dvars().haploFreq[keys[0]] haplotype_count_map = pop.dvars().haploNum[keys[0]] num_classes = len(haplotype_map) #class_freq = {'-'.join(i[0]) : str(i[1]) for i in haplotype_map.items()} class_freq = dict() for k, v in haplotype_map.items(): key = '-'.join(str(x) for x in k) class_freq[key] = v #log.debug("class_freq packed: %s", class_freq) class_count = dict() for k, v in haplotype_count_map.items(): key = '-'.join(str(x) for x in k) class_count[key] = v pop.vars()['richness'].append(num_classes) pop.vars()['class_freq'].append(class_freq) pop.vars()['class_count'].append(class_count) return True
def export(pop, fl, append): subPops = ["AZ", "TX"] n_inds = {"AZ": (43, 87), "TX": (98, 102)} sim.stat(pop, popSize=True, subPops=["AZ", "TX"], numOfMales=True) n_fm = pop.dvars().numOfFemales count_pop = dict([(pop, 0) for pop in subPops]) with open(fl, 'a') as f: seg_sites = list(range(pop.numLoci(0))) if not append: f.write('\n//\nsegsites: %d\n' % len(seg_sites)) f.write('positions: %s\n' % ' '.join([str(pop.locusPos(x)) for x in seg_sites])) for vsp in subPops: female_inds = [] for ind in pop.individuals(vsp): if ind.sex() == sim.FEMALE: female_inds.append(ind) for ind in random.sample(female_inds, n_inds[vsp][append]): count_pop[vsp] += 1 for p in range(1): #2 geno = ind.genotype(p, 0) f.write(''.join([str(geno[x]) for x in seg_sites]) + '\n') return count_pop
def __call__(self, pop): """ Main public interface to this demography model. When the model object is called in every time step, this method creates a new migration matrix. After migration, the stat function is called to inventory the subpopulation sizes, which are then returned since they're handed to the RandomSelection mating operator. If a new network slice is not active, the migration matrix from the previous step is applied again, and the new subpopulation sizes are returns to the RandomSelection mating operator as before. :return: A list of the subpopulation sizes for each subpopulation """ if 'gen' not in pop.vars(): gen = 0 else: gen = pop.dvars().gen ######### Do the per tick processing ########## log.debug("========= Processing network =============") # self._dbg_slice_pop_start(pop,gen) # update the migration matrix self._cached_migration_matrix = self._calculate_migration_matrix(gen) sim.migrate(pop, self._cached_migration_matrix) sim.stat(pop, popSize=True) # cache the new subpopulation names and sizes for debug and logging purposes # before returning them to the calling function self.subpopulation_names = sorted(str(list(pop.subPopNames()))) self.subpop_sizes = pop.subPopSizes() #print(self.subpop_sizes) return pop.subPopSizes()
def sampleTraitCounts(pop, param): """Samples trait counts for all loci in a replicant population, and stores the counts in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID, number of loci) Returns: Boolean true: all PyOperators need to return true. """ (ssize, mutation, popsize, sim_id, numloci) = param popID = pop.dvars().rep gen = pop.dvars().gen sample = drawRandomSample(pop, sizes=ssize) sim.stat(sample, alleleFreq=sim.ALL_AVAIL) for locus in range(numloci): alleleMap = sample.dvars().alleleNum[locus] for allele, count in alleleMap.iteritems(): _storeTraitCountSample(popID, ssize, locus, gen, mutation, popsize, sim_id, allele, count) return True
def export(pop, fl, append): subPops = ["AZ", "TX"] n_inds = { "AZ": (43, 87), "TX": (98, 102) } sim.stat(pop, popSize=True, subPops=["AZ", "TX"], numOfMales=True) n_fm = pop.dvars().numOfFemales count_pop = dict([(pop,0) for pop in subPops]) with open(fl, 'a') as f: seg_sites = list(range(pop.numLoci(0))) if not append: f.write('\n//\nsegsites: %d\n' % len(seg_sites)) f.write('positions: %s\n' % ' '.join([str(pop.locusPos(x)) for x in seg_sites])) for vsp in subPops: female_inds = [] for ind in pop.individuals(vsp): if ind.sex() == sim.FEMALE: female_inds.append(ind) for ind in random.sample(female_inds, n_inds[vsp][append]): count_pop[vsp] += 1 for p in range(1): #2 geno = ind.genotype(p, 0) f.write(''.join([str(geno[x]) for x in seg_sites]) + '\n') return count_pop
def write_frequency(pop, outfile): out = open(outfile, "w") out.write("selection,generation,locus,freq,selected\n") # iterate generations: they are from last to first, so using a decreasing range for i in range(pop.ancestralGens()-1, -1, -1): # activate this generation pop.useAncestralGen(i) # calculate allele frequencies # note: the vars option needs to be added so frequences are calculated for each sub-population # http://simupop.sourceforge.net/manual_svn/build/userGuide_ch5_sec11.html#defdicttype sim.stat(pop, alleleFreq = range(np.sum(pop.numLoci())), vars=['alleleFreq_sp']) #sim.stat(pop, alleleFreq = range(np.sum(pop.numLoci()))) # loop through populations for selection in pop.subPopNames(): # get sub-population index subpop_idx = pop.subPopByName(selection) # loop through each locus for locus in range(np.sum(pop.numLoci())): # fetch allele frequency from this sub-population and calculate heterozygosity # http://simupop.sourceforge.net/manual_svn/build/userGuide_ch5_sec11.html#defdicttype for freq in pop.dvars(subpop_idx).alleleFreq[locus].values(): # write out.write("{a},{b},{c},{d},{e}\n".format(a = selection, b = pop.ancestralGens()-i-1, c = pop.locusName(locus), d = freq, e = locus in adv_loci))
def _Ne(self, pop): # calculate allele frequency sim.stat(pop, alleleFreq=self.loci, subPops=self.subPops, vars=['alleleFreq_sp', 'alleleFreq'] if 'ne_sp' in self.vars else []) # determine loci loci = range( pop.totNumLoci()) if self.loci == sim.ALL_AVAIL else self.loci # ne for the whole population if len(self.vars) == 0 or 'ne' in self.vars: pop.dvars().ne = {} for loc in loci: pop.dvars().ne[loc] = self._calcNe(pop.dvars().alleleFreq[loc]) if 'ne_sp' in self.vars: if self.subPops == sim.ALL_AVAIL: subPops = range(pop.numSubPop()) else: subPops = self.subPops for sp in subPops: pop.dvars(sp).ne = {} for loc in loci: pop.dvars(sp).ne[loc] = self._calcNe( pop.dvars(sp).alleleFreq[loc]) return True
def sampleAlleleAndGenotypeFrequencies(pop, param): import simuPOP as sim import simuPOP.sampling as sampling (ssize, mutation, popsize, sim_id, num_loci, fname, fcli, seed) = param rep = pop.dvars().rep gen = pop.dvars().gen subpops = pop.subPopNames() sample_list = list() subpop_sizes = pop.subPopSizes() sample_sizes = [int(math.ceil(ssize * n)) for n in subpop_sizes] #log.debug("Sample sizes for subpops: %s", sample_sizes) min_sample_size = min(sample_sizes) for sp_name in subpops: sample = sampling.drawRandomSample(pop, subPops=pop.subPopByName(sp_name), sizes=min_sample_size) sim.stat(sample, haploFreq=range(0, num_loci), vars=['haploFreq', 'haploNum']) sim.stat(sample, alleleFreq=sim.ALL_AVAIL) keys = sample.dvars().haploFreq.keys() haplotype_map = sample.dvars().haploFreq[keys[0]] haplotype_count_map = sample.dvars().haploNum[keys[0]] num_classes = len(haplotype_map) #log.debug("gen: %s replicate: %s subpop: %s numclasses: %s class freq: %s", gen, popID, sp_name, num_classes, haplotype_map) #class_freq = {'-'.join(i[0]) : str(i[1]) for i in haplotype_map.items()} class_freq = dict() for k, v in haplotype_map.items(): key = '-'.join(str(x) for x in k) class_freq[key] = v #log.debug("class_freq packed: %s", class_freq) class_count = dict() for k, v in haplotype_count_map.items(): key = '-'.join(str(x) for x in k) class_count[key] = v # count_vals = sorted( [int(val) for val in class_count.values()] ) # # (prob, theta) = montecarlo(100000, count_vals, len(count_vals)) # #log.debug("slatkin test for class counts - prob: %s theta: %s ", prob, theta) sample = dict(subpop=sp_name, crichness=num_classes, cfreq=class_freq, ccount=class_count) sample_list.append(sample) data.storeClassFrequencySamples(sim_id, gen, rep, fname, fcli, seed, ssize, popsize, mutation, sample_list) return True
def env_set(pop): #Getting population size sim.stat(pop, popSize=True) subsize = pop.dvars().subPopSize numpop = len(subsize) #Attribute environmental value to all individuals of the same population for i in range(numpop): pop.setIndInfo(vec_env[i], 'env', subPop=i) return True
def get_mean_r2(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): """Returns the mean r2 value for each subpopulation, in list of length n_subpops""" # make pairwise migration matrix M = get_migration_matrix(m, n_subpops) # initialise population n_alleles = 2 pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, alleleNames=[str(i) for i in range(n_alleles)], infoFields='migrate_to') sim.initGenotype(pop, freq=[initial_frequencies, 1 - initial_frequencies]) #sim.initGenotype(pop, freq = [1/n_alleles for i in range(n_alleles)]) sim.initSex(pop) print(M) # run burn in generations pop.evolve(initOps=[], preOps=sim.Migrator(M, mode=sim.BY_PROBABILITY), matingScheme=sim.RandomMating(), gen=gens) # take sample from each subpopulation sample_pop = drawRandomSample(pop, sizes=[S] + [0] * (n_subpops - 1)) #sim.dump(sample_pop) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) #print(sample_pop.dvars(0).alleleFreq) # calculate r2 values sim.stat(sample_pop, LD=list(itertools.combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) #print(sample_pop.dvars(0).R2) r2s = [] for sp in [0]: #range(n_subpops*0): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = 0 count = 0 for pairs in itertools.combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def mutate(self, pop): sim.stat(pop, alleleFreq=range(pop.totNumLoci())) for i in range(pop.totNumLoci()): # Get the frequency of allele 1 (disease allele) if pop.dvars().alleleFreq[i][1] < self.cutoff: sim.kAlleleMutate(pop, k=2, rates=self.mu1, loci=[i]) else: sim.kAlleleMutate(pop, k=2, rates=self.mu2, loci=[i]) return True
def calcFst(self,pop): """ Calculate the Fst values for 1 simulation based on all loci """ sim.stat(pop, structure=range(self.loci),vars=['F_st']) self.results['fst'].append(pop.dvars().F_st) return True
def calcFst(pop): 'Calculate Fst and Gst for the whole population and a random sample' simuPOP.stat(pop, structure=range(5), vars=['F_st', 'G_st']) sample = simuPOP.sampling.drawRandomSample(pop, sizes=[500]*pop.numSubPop()) simuPOP.stat(sample, structure=range(5), vars=['F_st', 'G_st']) print ('Gen: %3d Gst: %.6f (all), %.6f (sample) Fst: %.6f (all) %.6f (sample)' \ % (pop.dvars().gen, pop.dvars().G_st, sample.dvars().G_st, pop.dvars().F_st, sample.dvars().F_st)) return True
def demo(pop): sim.stat(pop, popSize=True) subsize = pop.dvars().subPopSize #If subsize is of length 1, then it is a integer and len() does not work if type(subsize) == type(1): numpop = 1 else: numpop = len(subsize) vecsize = [popsize] * (numpop) return vecsize
def outputstat(pop): 'Calculate and output statistics' sim.stat(pop, popSize=True, numOfAffected=True, subPops=[(0, sim.ALL_AVAIL)], vars=['popSize_sp', 'propOfAffected_sp']) for sp in range(3): print('%s: %.3f%% (size %d)' % (pop.subPopName((0,sp)), pop.dvars((0,sp)).propOfAffected * 100., pop.dvars((0,sp)).popSize)) # return True
def CalcdemoNe(pop): sim.InfoSplitter(field="age", cutoff=[3, 10]) sim.stat(pop=pop, effectiveSize=range(2), subPops=[(0, 1), (1, 1)], vars='Ne_demo_base_sp') sim.stat(pop=pop, effectiveSize=range(2), subPops=[(0, 1)], vars='Ne_demo_base') return True
def assoTest(pop): 'Draw case-control sample and apply association tests' sample = drawCaseControlSample(pop, cases=500, controls=500) sim.stat(sample, association=(0, 2), vars=['Allele_ChiSq_p', 'Geno_ChiSq_p', 'Armitage_p']) print('Allele test: %.2e, %.2e, Geno test: %.2e, %.2e, Trend test: %.2e, %.2e' \ % (sample.dvars().Allele_ChiSq_p[0], sample.dvars().Allele_ChiSq_p[2], sample.dvars().Geno_ChiSq_p[0], sample.dvars().Geno_ChiSq_p[2], sample.dvars().Armitage_p[0], sample.dvars().Armitage_p[2])) return True
def insert_minor_genotype_frequencies_into_aggregate_matrix(self, minor_homozygotes, minor_heterozygotes, number_of_reps=1, meta_population=None): """ number_of_reps: integer specifying number of replicates to include inside of one allele frequency matrix. meta_population: Multiple replicate meta population. minor_allele_list: List or numpy.array of minor alleles. """ number_of_rows = 6 * number_of_reps homozygote_frequency_matrix = np.zeros((number_of_rows, 44445 + 2)) heterozygote_frequency_matrix = np.zeros((number_of_rows, 44445 + 2)) row_indices = list(range(number_of_rows)) print( "Calculating minor-allele genotype frequencies for {number_reps} replicates and writing them to an aggregate matrix.".format( number_reps=number_of_reps)) for replicate in meta_population.populations(): print("Replicate: {rep_id}".format(rep_id=replicate.dvars().rep)) sim.stat(replicate, genoFreq=sim.ALL_AVAIL, vars=['genoFreq_sp']) subpops_and_gens = [(1, 0), (2, 2), (3, 4), (4, 6), (5, 8), (6, 10)] for sp, gen in subpops_and_gens: row_index = row_indices.pop(0) print("Row index: {row_index}".format(row_index=row_index)) rep = replicate.dvars().rep # Homozygote homozygote_frequency_matrix[row_index, 0] = gen homozygote_frequency_matrix[row_index, 1] = rep homozygote_frequency_matrix[row_index, 2:] = [ replicate.dvars(sp).genoFreq[locus][ minor_homozygotes[locus]] for locus in range(44445)] # Heterozygote heterozygote_frequency_matrix[row_index, 0] = gen heterozygote_frequency_matrix[row_index, 1] = rep heterozygote_frequency_matrix[row_index, 2:] = [( replicate.dvars( sp).genoFreq[ locus][ minor_heterozygotes[ locus][ 0]] + replicate.dvars( sp).genoFreq[ locus][ minor_heterozygotes[ locus][ 1]]) for locus in range(44445)] return homozygote_frequency_matrix, heterozygote_frequency_matrix
def find_fixed_sites(self, founder_population, threshold, error): cloned_pop = founder_population.clone() clone_ps = parameterizer.PopulationStructure(cloned_pop, self.population_structure_filename, threshold, error) # st.setup_mating_structure() valid_inds = list(cloned_pop.indInfo('ind_id')) sim.stat(cloned_pop, numOfSegSites=True, vars=['numOfFixedSites', 'fixedSites']) num_fixed = cloned_pop.dvars().numOfFixedSites fixed_sites = list(cloned_pop.dvars().fixedSites) return fixed_sites, valid_inds
def sampleAlleleAndGenotypeFrequencies(pop, param): import simuPOP as sim import simuPOP.sampling as sampling (ssize, mutation, popsize, sim_id, num_loci, fname, fcli, seed) = param rep = pop.dvars().rep gen = pop.dvars().gen subpops = pop.subPopNames() sample_list = list() subpop_sizes = pop.subPopSizes() sample_sizes = [int(math.ceil(ssize * n)) for n in subpop_sizes] #log.debug("Sample sizes for subpops: %s", sample_sizes) min_sample_size = min(sample_sizes) for sp_name in subpops: sample = sampling.drawRandomSample(pop, subPops=pop.subPopByName(sp_name), sizes=min_sample_size) sim.stat(sample, haploFreq = range(0, num_loci), vars=['haploFreq', 'haploNum']) sim.stat(sample, alleleFreq = sim.ALL_AVAIL) keys = sample.dvars().haploFreq.keys() haplotype_map = sample.dvars().haploFreq[keys[0]] haplotype_count_map = sample.dvars().haploNum[keys[0]] num_classes = len(haplotype_map) #log.debug("gen: %s replicate: %s subpop: %s numclasses: %s class freq: %s", gen, popID, sp_name, num_classes, haplotype_map) #class_freq = {'-'.join(i[0]) : str(i[1]) for i in haplotype_map.items()} class_freq = dict() for k,v in haplotype_map.items(): key = '-'.join(str(x) for x in k) class_freq[key] = v #log.debug("class_freq packed: %s", class_freq) class_count = dict() for k,v in haplotype_count_map.items(): key = '-'.join(str(x) for x in k) class_count[key] = v # count_vals = sorted( [int(val) for val in class_count.values()] ) # # (prob, theta) = montecarlo(100000, count_vals, len(count_vals)) # #log.debug("slatkin test for class counts - prob: %s theta: %s ", prob, theta) sample = dict(subpop = sp_name, crichness = num_classes, cfreq = class_freq, ccount = class_count) sample_list.append(sample) data.storeClassFrequencySamples(sim_id,gen,rep,fname,fcli,seed,ssize,popsize,mutation,sample_list) return True
def calcNe(self, pop): sim.stat(pop, alleleFreq=self.loci) ne = {} for loc in self.loci: freq = pop.dvars().alleleFreq[loc] sumFreq = 1 - pop.dvars().alleleFreq[loc][0] if sumFreq == 0: ne[loc] = 0 else: ne[loc] = 1. / sum([(freq[x]/sumFreq)**2 for x in list(freq.keys()) if x != 0]) # save the result to the sim.Population. pop.dvars().ne = ne return True
def dynaMutator(pop, param): '''This mutator mutates commom loci with low mutation rate and rare loci with high mutation rate, as an attempt to raise allele frequency of rare loci to an higher level.''' # unpack parameter (cutoff, mu1, mu2) = param sim.stat(pop, alleleFreq=range(pop.totNumLoci())) for i in range(pop.totNumLoci()): # Get the frequency of allele 1 (disease allele) if pop.dvars().alleleFreq[i][1] < cutoff: sim.kAlleleMutate(pop, k=2, rates=mu1, loci=[i]) else: sim.kAlleleMutate(pop, k=2, rates=mu2, loci=[i]) return True
def fixed_chooser(pop, number_qtl): """ Chooses QTL from only fixed loci. :param pop: :param number_qtl: :return: """ sim.stat(pop, numOfSegSites=sim.ALL_AVAIL, vars=['fixedSites']) alpha_qtl = sorted(random.sample(pop.dvars().fixedSites, number_qtl)) omega_qtl = sorted( [pop.totNumLoci() + qtl_index for qtl_index in alpha_qtl]) all_qtl = alpha_qtl + omega_qtl proper_qtl = alpha_qtl return all_qtl, proper_qtl
def env_update(pop): global vec_env #Getting population size sim.stat(pop, popSize=True) subsize = pop.dvars().subPopSize numpop = len(subsize) #Already fixed for numpop==2 if numpop > 2: #i/2 is the result of an euclidian division vec_env = [ uniform(vec_env[i / 2] - 0.5, vec_env[i / 2] + 0.5) for i in range(numpop) ] return True
def allele_demise_tracker(self, pop, param): """Checks to see if any traits have gone to zero frequency and thus exited the population. If a trait has exited, the generation of exit is recorded in the cache, and a record inserted into the database recording the total lifetime in generations of the trait. NOTE: This operator is appropriate ONLY for infinite alleles models, with no back-mutation or other mechanisms by which a trait can "come back" from an exit. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID, number of loci) Returns: Boolean true: all PyOperators need to return true """ (ssize, mutation, popsize, sim_id, numloci) = param rep = pop.dvars().rep cur_gen = pop.dvars().gen sim.stat(pop, alleleFreq=sim.ALL_AVAIL) # iterate over loci for locus in range(numloci): alleles_in_use = self.origin_cache[rep][locus].keys() freq = pop.dvars().alleleFreq[locus] # zero frequencies do not show up in the sim.stat results, so we infer exit # by testing all the alleles in the origin cache for their presence in alleleFreq[locus] # any allele that isn't there anymore exited in this step for allele in alleles_in_use: if allele in freq.keys(): #log.debug("allele %s still in population at freq %s", allele, freq[allele]) pass else: lifetime = self._getLifetimeForExitedAllele( rep, allele, locus, cur_gen) #pp.pprint(freq) #pp.pprint(self.origin_cache) self._storeTraitLifetimeRecord(rep, ssize, mutation, popsize, sim_id, locus, allele, lifetime) return True
def OutputStats(pop): sim.stat(pop, alleleFreq=sim.ALL_AVAIL) sim.stat(pop, meanOfInfo='age', subPops=[(0,3)], suffix='_males') sim.stat(pop, meanOfInfo='age', subPops=[(0,4)], suffix='_females') sim.stat(pop, meanOfInfo='a', subPops=[(0,3)], suffix='_amales') sim.stat(pop, meanOfInfo='a', subPops=[(0,4)], suffix='_afemales') outstring = str(pop.dvars().gen) for locus in range(pop.totNumLoci()): outstring += "\t%.3f" % pop.dvars().alleleFreq[locus][1] outstring += "\t%3d" % pop.dvars().meanOfInfo_males['age'] outstring += "\t%3d" % pop.dvars().meanOfInfo_females['age'] outstring += "\t%4f" % pop.dvars().meanOfInfo_amales['a'] outstring += "\t%4f\n" % pop.dvars().meanOfInfo_afemales['a'] args.output.write(outstring) return True
def calcNe(pop, param): 'Calculated effective number of disease alleles at specified loci (param)' sim.stat(pop, alleleFreq=param) ne = {} for loc in param: freq = pop.dvars().alleleFreq[loc] sumFreq = 1 - pop.dvars().alleleFreq[loc][0] if sumFreq == 0: ne[loc] = 0 else: ne[loc] = 1. / sum([(freq[x]/sumFreq)**2 \ for x in list(freq.keys()) if x != 0]) # save the result to the sim.Population. pop.dvars().ne = ne return True
def count_traits_in_subpops(pop, param): ''' Count the number of subpops in which each trait occurs (1-numSubPops) combination in the loci/allele trait space :param pop: the population object - this is passed by simuPop in the PyOperator call. :param param: in this case pass the # of loci :return: True ''' (num_loci, numSubPops) = param sp.stat(pop, haploFreq=range(0, num_loci), vars=['haploFreq_sp', 'haploNum_sp'], subPops=sp.ALL_AVAIL) traits_in_subpops = defaultdict(int) # now count for all the subpops for subPop in range(0, numSubPops): key = list(pop.vars(subPop)['haploNum'].keys()) #traits_n_counts = pop.vars(subPop)['haploNum'][key[0]] haplotype_count_map = list(pop.vars(subPop)['haploNum'][key[0]].keys()) for loci_allele_tuple in haplotype_count_map: traits_in_subpops[str(loci_allele_tuple)] += 1 pop.vars()['pop_count'] = traits_in_subpops vals = pop.vars()['pop_count'].values() ones = twos = fivepercent = tenpercent = twentypercent = fiftypercent = 0 for val in vals: if val == 1: ones += 1 if val == 2: twos += 1 if val < (int(int(numSubPops) * .05)): fivepercent += 1 if val < (int(int(numSubPops) * .10)): tenpercent += 1 if val < (int(int(numSubPops) * .2)): twentypercent += 1 if val < (int(int(numSubPops) * .5)): fiftypercent += 1 pop.vars()['ones'].append(ones) pop.vars()['twos'].append(twos) pop.vars()['fivepercent'].append(fivepercent) pop.vars()['tenpercent'].append(tenpercent) pop.vars()['twentypercent'].append(twentypercent) pop.vars()['fiftypercent'].append(fiftypercent) return True
def removeRare(pop,thresh_hi=0.999999,thresh_lo=0.000001,DPL=['rs4491689'],savefile=False): """ Removes rare SNPs with a minor allele frequency below a threshold value. The default thresholds will only remove monomorphic loci. If savefile=False, the population is simply modified. Set savefile to a string to save the population to a binary file. The function returns: the number of loci removed a list of the the relative locations of the DPL. """ sim.stat(pop,alleleFreq=range(pop.totNumLoci())) lociToRemove = [l for l in xrange(pop.totNumLoci()) if pop.dvars().alleleFreq[l][0] > thresh_hi or pop.dvars().alleleFreq[l][0] < thresh_lo] pop.removeLoci(lociToRemove) if savefile: pop.save(savefile) return len(lociToRemove),[pop.locusByName(x) for x in DPL]
def allele_demise_tracker(self, pop, param): """Checks to see if any traits have gone to zero frequency and thus exited the population. If a trait has exited, the generation of exit is recorded in the cache, and a record inserted into the database recording the total lifetime in generations of the trait. NOTE: This operator is appropriate ONLY for infinite alleles models, with no back-mutation or other mechanisms by which a trait can "come back" from an exit. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID, number of loci) Returns: Boolean true: all PyOperators need to return true """ (ssize, mutation, popsize, sim_id,numloci) = param rep = pop.dvars().rep cur_gen = pop.dvars().gen sim.stat(pop, alleleFreq=sim.ALL_AVAIL) # iterate over loci for locus in range(numloci): alleles_in_use = self.origin_cache[rep][locus].keys() freq = pop.dvars().alleleFreq[locus] # zero frequencies do not show up in the sim.stat results, so we infer exit # by testing all the alleles in the origin cache for their presence in alleleFreq[locus] # any allele that isn't there anymore exited in this step for allele in alleles_in_use: if allele in freq.keys(): #log.debug("allele %s still in population at freq %s", allele, freq[allele]) pass else: lifetime = self._getLifetimeForExitedAllele(rep,allele,locus,cur_gen) #pp.pprint(freq) #pp.pprint(self.origin_cache) self._storeTraitLifetimeRecord(rep,ssize,mutation,popsize,sim_id,locus,allele,lifetime) return True
def evolve(pop, r=0): sim.dump(pop) pop.evolve( initOps=[sim.InitSex()], matingScheme=sim.RandomMating(ops=sim.Recombinator(rates=0.01)), postOps=[sim.stat(pop, alleleFreq=range(24), step=10), sim.PyEval(r"alleleFreq[0]", step=10)], gen=50, )
def printAlleleFreq(pop): 'Print allele frequencies of all loci and populations' sim.stat(pop, alleleFreq=[dmi1, dmi2, dmi3, dmi4, ad1, ad2, ad3], vars=['alleleFreq_sp']) print 'Allele frequencies at generation', pop.dvars().gen for p in range(3): for l in [dmi1, dmi2, dmi3, dmi4, ad1, ad2, ad3]: if l == ad3: print '%.2f\n' % pop.dvars(p).alleleFreq[l][1], else: print '%.2f' % pop.dvars(p).alleleFreq[l][1], return True
def get_mean_r2(Ne, S, n_loci, gens, repeats, n_subpops, initial_frequencies, m): M = get_migration_matrix(m, n_subpops) pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') sim.initGenotype(pop, freq=initial_frequencies) pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=initial_frequencies)], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(pop, sizes=[S] * n_subpops) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD=list(combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) r2s = [] for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = count = 0 for pairs in combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def get_FCs(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): ''''Runs simulations for allelic fluctuations model with n subpopulations, and returns a list of FC values (one for each subpopulation)''' # population to evolve ((from infinite gamete pool)) popNe = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') # initial sample population (from infinite gamete pool) popS = sim.Population(size=[S] * n_subpops, ploidy=2, loci=[1] * n_loci) sim.initGenotype(popNe, freq=initial_frequencies) sim.initGenotype(popS, freq=initial_frequencies) # get initial sample allele frequencies sim.stat(popS, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) M = get_migration_matrix(m, n_subpops) popNe.evolve(initOps=[sim.InitSex()], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(popNe, sizes=[S] * n_subpops) sim.stat(sample_pop, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) all_FCs = [] for sp in range(n_subpops): initial_allele_frequencies = popS.dvars(sp).alleleFreq final_allele_frequencies = sample_pop.dvars(sp).alleleFreq sp_count = 0 sp_FC = 0 for locus in range(n_loci): init_pair = repair(initial_allele_frequencies[locus]) end_pair = repair(final_allele_frequencies[locus]) if init_pair[0]**2 + init_pair[1]**2 != 1: sp_FC += fc_variant([init_pair[0], init_pair[1]], [end_pair[0], end_pair[1]]) sp_count += 1 all_FCs.append(sp_FC / sp_count) return all_FCs
def avgAllele(pop): 'Get average allele by affection sim.status.' sim.stat(pop, alleleFreq=(0,1), subPops=[(0,0), (0,1)], numOfAffected=True, vars=['alleleNum', 'alleleNum_sp']) avg = [] for alleleNum in [\ pop.dvars((0,0)).alleleNum[0], # first locus, unaffected pop.dvars((0,1)).alleleNum[0], # first locus, affected pop.dvars().alleleNum[1], # second locus, overall ]: alleleSum = numAllele = 0 for idx,cnt in enumerate(alleleNum): alleleSum += idx * cnt numAllele += cnt if numAllele == 0: avg.append(0) else: avg.append(alleleSum * 1.0 /numAllele) # unaffected, affected, loc2 pop.dvars().avgAllele = avg return True
def env_update(pop): global vec_env sim.stat(pop, popSize=True) subsize = pop.dvars().subPopSize numpop = len(subsize) #Already fixed for numpop==2 if numpop > 2: #k is the number to create the two new values (x=x0+k ou x=x0-k) k = 1.6 / float(numpop) #tmp will recieve the new env values tmp = [0] * numpop for i in range(numpop): #if we are left to the old value (x0) if (i % 2 == 0): #i/2 is the result of an euclidian division tmp[i] = round(vec_env[i / 2] - k, 1) #else, we are right to the old value (x0) else: tmp[i] = round(vec_env[i / 2] + k, 1) vec_env = tmp return True
def censusNumAlleles(pop, param): """Samples allele richness for all loci in a replicant population, and stores the richness of the sample in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID) Returns: Boolean true: all PyOperators need to return true. """ (mutation, popsize,sim_id,numloci) = param popID = pop.dvars().rep gen = pop.dvars().gen sim.stat(pop, alleleFreq=sim.ALL_AVAIL) for locus in range(numloci): numAlleles = len(pop.dvars().alleleFreq[locus].values()) _storeRichnessSample(popID,numAlleles,locus,gen,mutation,popsize,sim_id) return True
def log(self, pop): self.called -= 1 if self.called != 0: return True self.block += 1 sim.stat(pop, alleleFreq=sim.ALL_AVAIL, vars=["alleleNum"]) tmp_dict = {} tmp_dict["summary"] = self.population_summary(pop, 0) tmp_dict["loci"] = self.allele_summary(pop, 0) # if self.final: # tmp_dict['perform'] = pop.dvars().perform # tmp_dict['configuration'] = { x:y for x,y in self.config_opts.__dict__.iteritems() } # tmp_dict['runtime'] = pop.dvars().rt # tmp_dict['mem_usage'] = pop.dvars().mem_usage with open(self.output + "." + str(int(self.block * self.step)) + ".pop.json", "wt") as lfile: res = json.dump(tmp_dict, lfile, sort_keys=True, indent=1) self.called = self.step return True
def censusTraitCounts(pop, param): """Samples trait counts for all loci in a replicant population, and stores the counts in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID, number of loci) Returns: Boolean true: all PyOperators need to return true. """ (mutation, popsize, sim_id,numloci) = param popID = pop.dvars().rep gen = pop.dvars().gen sim.stat(pop, alleleFreq=sim.ALL_AVAIL) for locus in range(numloci): alleleMap = pop.dvars().alleleNum[locus] for allele,count in alleleMap.iteritems(): _storeTraitCountSample(popID, locus, gen, mutation, popsize, sim_id, allele, count) return True
def simu(w, m1, m2, psize, afr_size, fl): print(fl) if os.path.exists(fl): os.remove(fl) matingScheme = sim.HaplodiploidMating(sexMode=sex_func, subPopSize=get_sizes) fitness = { (0,):1.0, (1,): 1.0, (0,0):w, (0,1):(1+w)/2, (1,1):1.0 } migrator = sim.BackwardMigrator(rate=[[0, m1, m2], [m1, 0, m2], [0, 0, 0]], begin=4, step=1) count_pop = {} sg = sample_genes() for i in range(len(sg)): (n_loci, gene_prop_az, gene_prop_tx) = sg[i] selector = sim.MlSelector([sim.MapSelector(loci=x, fitness=fitness) for x in range(n_loci)], mode=sim.ADDITIVE, begin=4, step=1) pre_ops = [ migrator, selector, sim.InitGenotype(prop=(0.1, 0.9), subPops=[2]) ] post_ops = [ sim.ResizeSubPops(subPops=[2], sizes=[math.ceil(psize*afr_size)], propagate=True, at=g) for g in range(1,11) ] pop = sim.Population(size=[psize,psize,math.ceil(afr_size*psize)], ploidy=2, loci=n_loci, subPopNames=['AZ', 'TX', 'AFR'], ancGen=-1, infoFields=['fitness','migrate_to', 'migrate_from']) # store all past generations pop.evolve( initOps=[sim.InitSex(maleFreq=0.9), sim.InitGenotype(prop=(gene_prop_az, 1 - gene_prop_az), subPops=[0]), sim.InitGenotype(prop=(gene_prop_tx, 1 - gene_prop_tx), subPops=[1]), sim.InitGenotype(prop=(0.1, 0.9), subPops=[2]) ], matingScheme=matingScheme, preOps=[], postOps=[], gen=1 ) sim.stat(pop, alleleFreq=list(range(n_loci)), subPops=[0]) az1 = np.mean(np.array([pop.dvars().alleleFreq[loc][1] for loc in range(n_loci)])) sim.stat(pop, alleleFreq=list(range(n_loci)), subPops=[1]) tx1 = np.mean(np.array([pop.dvars().alleleFreq[loc][1] for loc in range(n_loci)])) cp = export(pop, fl, False) count_pop["AZ_early"] = cp["AZ"] count_pop["TX_early"] = cp["TX"] pop.evolve( initOps=[], matingScheme=matingScheme, preOps=pre_ops, postOps=post_ops, gen=10 ) sim.stat(pop, alleleFreq=list(range(n_loci)), subPops=[0]) az2 = np.mean(np.array([pop.dvars().alleleFreq[loc][1] for loc in range(n_loci)])) sim.stat(pop, alleleFreq=list(range(n_loci)), subPops=[1]) tx2 = np.mean(np.array([pop.dvars().alleleFreq[loc][1] for loc in range(n_loci)])) cp = export(pop, fl, True) count_pop["AZ_late"] = cp["AZ"] count_pop["TX_late"] = cp["TX"] print("%i) nloci: %i AZ : %.3f->%.3f TX : %.3f->%.3f" % (i, n_loci, az1, az2, tx1, tx2)) n = count_pop["AZ_early"] + count_pop["TX_early"] + count_pop["AZ_late"] + count_pop["TX_late"] with open(fl, 'r+') as f: lns = f.readlines() lns.insert(0, '30164 48394 29292\n') lns.insert(0, 'simuPOP_export %d %d\n' % (n, len(sg))) f.seek(0) # readlines consumes the iterator, so we need to start over f.writelines(lns)
def checkAlleles(self,pop,param): """ save all allele frequencies of all loci over all generations in self.results.alleleFr. All data are saved in a dictionary self.results. To acquire the data from a specific loci and allele, here is an example: self.results['alleleFreq01'] where 0 corresponds to the loci and 1 to the allele. It also saves the allele frequencies from the last generation from all loci in the following format: [allele0 of loci 0, allele 1 of loci 0, ... allele N of loci N] """ # store the allele frequencies from subpopulation 1 (Y) from all loci sim.stat(pop,alleleFreq=range(self.loci),subPops=[1]) for loci in range(self.loci): for allele in range(self.alleles): self.results['alleleFr{loci}{allele}'.format(loci=loci,allele=allele)].append(pop.dvars().alleleFreq[loci][allele]) # add all of them in the dictionary that I can extract from the object lociAllele = 'alleleFr{loci}{allele}'.format(loci=loci,allele=allele) self.results['all_allelesFreq'][str(lociAllele)]=list((self.results['alleleFr{loci}{allele}'.format(loci=loci,allele=allele)])) # save the allele frequency of allele 0 (which is selected) from locus 50 from all SubPopulations for i in range(0,len(param)): sim.stat(pop,alleleFreq=range(self.loci),subPops=param[i]) if i ==0: self.results['XSelectedLoci'].append(pop.dvars().alleleFreq[50][0]) if i ==1: self.results['YSelectedLoci'].append(pop.dvars().alleleFreq[50][0]) if i ==2: self.results['ZSelectedLoci'].append(pop.dvars().alleleFreq[50][0]) if i ==3: self.results['WSelectedLoci'].append(pop.dvars().alleleFreq[50][0]) # save the haplotypes for each suppopulation sim.stat(pop,alleleFreq=range(self.loci),subPops=[1]) for i in param: if i=='x': popInd=0 if i=='y': popInd=1 if i=='z': popInd=2 if i=='w': popInd=3 # save a sample of the haplotypes from 100 individuals from all populations when the allele frequency of locus 50 is close to 0.1 if float(pop.dvars().alleleFreq[50][0])>0.08 and (pop.dvars().alleleFreq[50][0])<0.11: if self.results['all_haplotypes']['{pop}01'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}01'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}01'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.2 if (pop.dvars().alleleFreq[50][0])>0.18 and (pop.dvars().alleleFreq[50][0])<0.22: if self.results['all_haplotypes']['{pop}02'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}02'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}02'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.3 if (pop.dvars().alleleFreq[50][0])>0.28 and (pop.dvars().alleleFreq[50][0])<0.32: if self.results['all_haplotypes']['{pop}03'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}03'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}03'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.4 if (pop.dvars().alleleFreq[50][0])>0.38 and (pop.dvars().alleleFreq[50][0])<0.42: if self.results['all_haplotypes']['{pop}04'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}04'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}04'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.5 if (pop.dvars().alleleFreq[50][0])>0.48 and (pop.dvars().alleleFreq[50][0])<0.52: if self.results['all_haplotypes']['{pop}05'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}05'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}05'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.6 if (pop.dvars().alleleFreq[50][0])>0.58 and (pop.dvars().alleleFreq[50][0])<0.62: if self.results['all_haplotypes']['{pop}06'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}06'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}06'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.7 if (pop.dvars().alleleFreq[50][0])>0.68 and (pop.dvars().alleleFreq[50][0])<0.72: if self.results['all_haplotypes']['{pop}07'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}07'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}07'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.8 if (pop.dvars().alleleFreq[50][0])>0.78 and (pop.dvars().alleleFreq[50][0])<0.82: if self.results['all_haplotypes']['{pop}08'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}08'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}08'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 0.9 if (pop.dvars().alleleFreq[50][0])>0.88 and (pop.dvars().alleleFreq[50][0])<0.92: if self.results['all_haplotypes']['{pop}09'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}09'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}09'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) # save a sample of the haplotypes from 100 individuals from all pops when the allele frequency of locus 50 is close to 1 if (pop.dvars().alleleFreq[50][0])>0.97 and (pop.dvars().alleleFreq[50][0])<0.99: if self.results['all_haplotypes']['{pop}1'.format(pop=i)]==[]: for ind in random.sample(range(0,pop.subPopSize(i)), 100): self.results['all_haplotypes']['{pop}1'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(0))) self.results['all_haplotypes']['{pop}1'.format(pop=i)].append(list(pop.individual(ind,popInd).genotype(1))) return True
''' Created on Oct 6, 2010 @author: Gaurav Singhal ''' import simuPOP a import simuPOP as sim pop = sim.Population(size=1000, loci=[2]) pop.evolve(initOps = [sim.InitSex(),sim.initGenotype(pop, genotype=[1, 2, 2, 1])], matingScheme=sim.RandomMating(ops=sim.Recombinator(rates=0.01)), postOps = [sim.stat(pop, LD=[0, 1]),sim.PyEval(r"'%.2f\n' % LD[0][1]", step=10),],gen=100)