def _geneflow(self, pop1, pop2, start, rate): return msp.MigrationRateChange(time=start, rate=rate, matrix_index=(pop1, pop2))
def __init__(self): super().__init__() self.generation_time = default_generation_time T_AF = 148e3 / self.generation_time T_OOA = 51e3 / self.generation_time T_EU0 = 23e3 / self.generation_time T_EG = 5115 / self.generation_time # Growth rates r_EU0 = 0.00307 r_EU = 0.0195 r_AF = 0.0166 # population sizes N_A = 7310 N_AF1 = 14474 N_B = 1861 N_EU0 = 1032 N_EU1 = N_EU0 / math.exp(-r_EU0 * (T_EU0 - T_EG)) # migration rates m_AF_B = 15e-5 m_AF_EU = 2.5e-5 # present Ne N_EU = N_EU1 / math.exp(-r_EU * T_EG) N_AF = N_AF1 / math.exp(-r_AF * T_EG) self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_AF, growth_rate=r_AF), msprime.PopulationConfiguration(initial_size=N_EU, growth_rate=r_EU) ] self.migration_matrix = [ [0, m_AF_EU], [m_AF_EU, 0], ] self.demographic_events = [ msprime.MigrationRateChange(time=T_EG, rate=m_AF_EU, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EG, rate=m_AF_EU, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EG, growth_rate=r_EU0, initial_size=N_EU1, population_id=1), msprime.PopulationParametersChange(time=T_EG, growth_rate=0, initial_size=N_AF1, population_id=0), msprime.MigrationRateChange(time=T_EU0, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU0, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU0, initial_size=N_B, growth_rate=0, population_id=1), msprime.MassMigration(time=T_OOA, source=1, destination=0, proportion=1.0), msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ]
def __init__(self): super().__init__() # First we set out the maximum likelihood values of the various parameters # given in Table 1 (under archaic admixture). N_0 = 3600 N_YRI = 13900 N_B = 880 N_CEU0 = 2300 N_CHB0 = 650 # Times are provided in years, so we convert into generations. # In the published model, the authors used a generation time of 29 years to # convert from genetic to physical units self.generation_time = 29 T_AF = 300e3 / self.generation_time T_B = 60.7e3 / self.generation_time T_EU_AS = 36.0e3 / self.generation_time T_arch_afr_split = 499e3 / self.generation_time T_arch_afr_mig = 125e3 / self.generation_time T_nean_split = 559e3 / self.generation_time T_arch_adm_end = 18.7e3 / self.generation_time # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two populations r_CEU = 0.00125 r_CHB = 0.00372 N_CEU = N_CEU0 / math.exp(-r_CEU * T_EU_AS) N_CHB = N_CHB0 / math.exp(-r_CHB * T_EU_AS) # Migration rates during the various epochs. m_AF_B = 52.2e-5 m_YRI_CEU = 2.48e-5 m_YRI_CHB = 0e-5 m_CEU_CHB = 11.3e-5 m_AF_arch_af = 1.98e-5 m_OOA_nean = 0.825e-5 # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. # We also have two archaic populations, putative Neanderthals and # archaicAfrican, which are population indices 3=Nean and 4=arch_afr. # Their sizes are equal to the ancestral reference population size N_0. self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_YRI), msprime.PopulationConfiguration(initial_size=N_CEU, growth_rate=r_CEU), msprime.PopulationConfiguration(initial_size=N_CHB, growth_rate=r_CHB), msprime.PopulationConfiguration(initial_size=N_0), msprime.PopulationConfiguration(initial_size=N_0) ] self.migration_matrix = [ # noqa [0, m_YRI_CEU, m_YRI_CHB, 0, 0], # noqa [m_YRI_CEU, 0, m_CEU_CHB, 0, 0], # noqa [m_YRI_CHB, m_CEU_CHB, 0, 0, 0], # noqa [0, 0, 0, 0, 0], # noqa [0, 0, 0, 0, 0] # noqa ] # noqa self.demographic_events = [ # first event is migration turned on between modern and archaic humans msprime.MigrationRateChange(time=T_arch_adm_end, rate=m_AF_arch_af, matrix_index=(0, 4)), msprime.MigrationRateChange(time=T_arch_adm_end, rate=m_AF_arch_af, matrix_index=(4, 0)), msprime.MigrationRateChange(time=T_arch_adm_end, rate=m_OOA_nean, matrix_index=(1, 3)), msprime.MigrationRateChange(time=T_arch_adm_end, rate=m_OOA_nean, matrix_index=(3, 1)), msprime.MigrationRateChange(time=T_arch_adm_end, rate=m_OOA_nean, matrix_index=(2, 3)), msprime.MigrationRateChange(time=T_arch_adm_end, rate=m_OOA_nean, matrix_index=(3, 2)), # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_arch_af, matrix_index=(0, 4)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_arch_af, matrix_index=(4, 0)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_OOA_nean, matrix_index=(1, 3)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_OOA_nean, matrix_index=(3, 1)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), msprime.MigrationRateChange(time=T_B, rate=0), msprime.MigrationRateChange(time=T_B, rate=m_AF_arch_af, matrix_index=(0, 4)), msprime.MigrationRateChange(time=T_B, rate=m_AF_arch_af, matrix_index=(4, 0)), # Beginning of migration between African and archaic African populations msprime.MigrationRateChange(time=T_arch_afr_mig, rate=0), # Size changes to N_0 at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_0, population_id=0), # Archaic African merges with moderns msprime.MassMigration(time=T_arch_afr_split, source=4, destination=0, proportion=1.0), # Neanderthal merges with moderns msprime.MassMigration(time=T_nean_split, source=3, destination=0, proportion=1.0) ]
def _orangutan(): id = "TwoSpecies_2L11" description = "Two population orangutan model" long_description = """ The two orang-utan species, Sumatran (Pongo abelii) and Bornean (Pongo pygmaeus) inferred from the joint-site frequency spectrum with ten individuals from each population. This model is an isolation-with- migration model, with exponential growth or decay in each population after the split. The Sumatran population grows in size, while the Bornean population slightly declines. """ citations = [_locke2011.because(stdpopsim.CiteReason.DEM_MODEL)] populations = [ stdpopsim.Population("Bornean", "Pongo pygmaeus (Bornean) population"), stdpopsim.Population("Sumatran", "Pongo abelii (Sumatran) population"), ] # Parameters from paper: # ancestral size, before split Na = 17934 # time of split T_split_years = 403149 # get split time in units of generations generation_time = _species.generation_time T_split = T_split_years / generation_time # proportion of ancestral pop to branch B s = 0.592 # sizes at split Na_B = 17934 * s Na_S = 17934 * (1 - s) # present sizes N_B = 8805 N_S = 37661 # get growth rates r_B = -1 * math.log(Na_B / N_B) / T_split r_S = -1 * math.log(Na_S / N_S) / T_split # migration rates m_S_B = 0.395 / 2 / Na m_B_S = 0.239 / 2 / Na # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=B and 1=S # initially. return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, citations=citations, populations=populations, generation_time=generation_time, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_B, growth_rate=r_B, metadata=populations[0].asdict()), # NOQA msprime.PopulationConfiguration( initial_size=N_S, growth_rate=r_S, metadata=populations[1].asdict()), # NOQA ], migration_matrix=[ [0, m_B_S], # NOQA [m_S_B, 0], # NOQA ], demographic_events=[ # Merge populations and turn off migration, change to size Na msprime.MassMigration(time=T_split, source=1, destination=0, proportion=1.0), msprime.MigrationRateChange(time=T_split, rate=0), msprime.PopulationParametersChange(time=T_split, initial_size=Na, growth_rate=0, population_id=0), ], )
def hominin_composite_archaic_africa(): dm = hominin_composite() id = "HomininComposite2_4G20" description = "HomininComposite2_4G20 plus archaic lineage in Africa" generation_time = 29 # Ragsdale & Gravel 2019 N_0 = 3600 T_arch_afr_split = 499e3 / generation_time T_arch_afr_mig = 125e3 / generation_time T_arch_adm_end = 18.7e3 / generation_time m_AF_arch_af = 1.98e-5 T_YRI_CEU_split = 65.7e3 / generation_time populations = dm.populations + [ stdpopsim.Population( "ArchaicAFR", "Putative Archaic Africans", sampling_time=None ), ] population_configurations = dm.population_configurations + [ msprime.PopulationConfiguration( initial_size=N_0, metadata=populations[-1].asdict() ), ] pop = {p.id: i for i, p in enumerate(populations)} demographic_events = dm.demographic_events + [ # migration turned on between Yoruban and archaic African populations msprime.MigrationRateChange( time=T_arch_adm_end, rate=m_AF_arch_af, matrix_index=(pop["YRI"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_arch_adm_end, rate=m_AF_arch_af, matrix_index=(pop["ArchaicAFR"], pop["YRI"]), ), # YRI merges into Anc: turn off migration between YRI and ArchaicAFR. msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=0, matrix_index=(pop["YRI"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=0, matrix_index=(pop["ArchaicAFR"], pop["YRI"]), ), # migration turned on between African and archaic African populations msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=m_AF_arch_af, matrix_index=(pop["Anc"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=m_AF_arch_af, matrix_index=(pop["ArchaicAFR"], pop["Anc"]), ), # Beginning of migration between African and archaic African populations msprime.MigrationRateChange( time=T_arch_afr_mig, rate=0, matrix_index=(pop["Anc"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_arch_afr_mig, rate=0, matrix_index=(pop["ArchaicAFR"], pop["Anc"]), ), # Archaic African merges with moderns msprime.MassMigration( time=T_arch_afr_split, source=pop["ArchaicAFR"], destination=pop["Anc"], proportion=1.0, ), ] demographic_events.sort(key=lambda x: x.time) return stdpopsim.DemographicModel( id=id, description=description, long_description=dm.long_description, populations=populations, citations=dm.citations.copy(), generation_time=generation_time, population_configurations=population_configurations, demographic_events=demographic_events, )
def run_model(self): # Load recomb map recomb_map = msprime.RecombinationMap.read_hapmap(self.infile) # initial population sizes: N_bronze = 50000 N_Yam = 20000 N_baa = 10000 N_whg = 10000 N_ehg = 10000 N_neo = 50000 N_chg = 10000 N_A = 5000 # Ancestor of WHG and EHG N_B = 5000 # Ancestor of CHG and Neolithic farmers # Time of events T_bronze = 150 T_Yam = 200 T_neo = 250 T_baa = 275 T_near_east = 800 T_europe = 500 T_basal = 1500 # Growth rate and initial population size for present day from bronze age r_EU = 0.067 N_present = N_bronze / math.exp(-r_EU * T_bronze) #Populations: 0=present/bronze/neolithic_farmers/Ana/B,1=Yam/CHG,2=WHG/A, 3=EHG, 4=BAA population_configurations = [ msprime.PopulationConfiguration(initial_size=N_present, growth_rate=r_EU), msprime.PopulationConfiguration(initial_size=N_Yam), msprime.PopulationConfiguration(initial_size=N_whg), msprime.PopulationConfiguration(initial_size=N_ehg), msprime.PopulationConfiguration(initial_size=N_baa) ] bronze_formation = [ msprime.MassMigration(time=T_bronze, source=0, dest=1, proportion=0.5), msprime.PopulationParametersChange(time=T_bronze, initial_size=N_neo, growth_rate=0, population=0) ] yam_formation = [ msprime.MassMigration(time=T_Yam, source=1, dest=3, proportion=0.5), msprime.PopulationParametersChange(time=T_Yam, initial_size=N_chg, population=1), msprime.MigrationRateChange(time=T_Yam, rate=self.hg_mig_rate, matrix_index=(2, 3)), msprime.MigrationRateChange(time=T_Yam, rate=self.hg_mig_rate, matrix_index=(3, 2)) ] european_neolithic = [ msprime.MassMigration(time=T_neo, source=0, dest=2, proportion=1.0 / 4.0) ] baa_formation = [ msprime.MassMigration(time=T_baa, source=4, dest=1, proportion=1.0 / 4.0) ] ana_split = [ msprime.MassMigration(time=276, source=4, dest=0, proportion=1) ] hg_split = [ msprime.MassMigration(time=T_europe, source=3, dest=2, proportion=1), msprime.MigrationRateChange(time=T_europe, rate=0), msprime.PopulationParametersChange(time=T_europe, initial_size=N_A, population=2) ] near_east_split = [ msprime.MassMigration(time=T_near_east, source=1, dest=0, proportion=1), msprime.PopulationParametersChange(time=T_near_east, initial_size=N_B, population=0) ] basal_split = [ msprime.MassMigration(time=T_basal, source=2, dest=0, proportion=1) ] demographic_events = bronze_formation + yam_formation + european_neolithic + baa_formation + ana_split + hg_split + near_east_split + basal_split # Define samples samples = [] for i, p in enumerate(self.populations): sample = [msprime.Sample(time=self.sample_times[i], population=p)] samples = samples + sample * self.nhaps[i] # Debugging the demography migration_matrix = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dd.print_history() # Simulate chromosome 3 only tree_sequence = msprime.simulate( recombination_map=recomb_map, mutation_rate=self.mutation_rate, population_configurations=population_configurations, demographic_events=demographic_events, samples=samples) return tree_sequence
def _get_demography(self): """ Returns demography scenario based on an input tree and admixture edge list with events in the format (source, dest, start, end, rate). Time on the tree is defined in units of generations. """ # Define demographic events for msprime demog = set() # tag min index child for each node, since at the time the node is # called it may already be renamed by its child index b/c of # divergence events. for node in self.tree.treenode.traverse(): if node.children: node._schild = min([i.idx for i in node.get_descendants()]) else: node._schild = node.idx # traverse tree from root to tips for node in self.tree.treenode.traverse(): # if children add div events if node.children: dest = min([i._schild for i in node.children]) source = max([i._schild for i in node.children]) time = int(node.height) demog.add(ms.MassMigration(time, source, dest)) # for all nodes set Ne changes demog.add( ms.PopulationParametersChange(time, initial_size=node.Ne, population=dest), ) # tips set populations sizes (popconfig seemings does this too, # but it didn't actually work for tips until I added this... else: time = int(node.height) demog.add( ms.PopulationParametersChange( time, initial_size=node.Ne, population=node.idx, )) # debugging helper if self._debug: print( 'div time: {:>9}, {:>2} {:>2}, {:>2} {:>2}, Ne={}'.format( int(time), source, dest, node.children[0].idx, node.children[1].idx, node.Ne, ), file=sys.stderr, ) # Add migration pulses if not self.admixture_type: for evt in range(self.aedges): # rate is prop. of population, time is prop. of edge rate = self.ms_migrate[evt] time = self.ms_migtime[evt] source, dest = self.admixture_edges[evt][:2] # rename nodes at time of admix in case diverge renamed them snode = self.tree.treenode.search_nodes(idx=source)[0] dnode = self.tree.treenode.search_nodes(idx=dest)[0] children = (snode._schild, dnode._schild) demog.add( ms.MassMigration(time, children[0], children[1], rate)) if self._debug: print( 'mig pulse: {:>9}, {:>2} {:>2}, {:>2} {:>2}, rate={:.2f}' .format( time, "", "", # node.children[0].idx, node.children[1].idx, snode.name, dnode.name, rate), file=sys.stderr, ) # Add migration intervals else: for evt in range(self.aedges): rate = self.ms_migration[evt]['mrates'] time = (self.ms_migration[evt]['mtimes']).astype(int) source, dest = self.admixture_edges[evt][:2] # rename nodes at time of admix in case diverg renamed them snode = self.tree.treenode.search_nodes(idx=source)[0] dnode = self.tree.treenode.search_nodes(idx=dest)[0] children = (snode._schild, dnode._schild) demog.add(ms.MigrationRateChange(time[0], rate, children)) demog.add(ms.MigrationRateChange(time[1], 0, children)) if self._debug: print("mig interv: {}, {}, {}, {}, {:.3f}".format( time[0], time[1], children[0], children[1], rate), file=sys.stderr) # sort events by type (so that mass migrations come before pop size # changes) and time demog = sorted(list(demog), key=lambda x: x.type) demog = sorted(demog, key=lambda x: x.time) self.ms_demography = demog
def __init__(self): # Parameters are taken from the Methods - Simulated data section # Population sizes N_AF0 = 7310 # Initial african population size N_AF1 = 14474 # Second african pop. size N_OOA = 1861 # OOA population size N_CEU0 = 1032 # European population size at CEU/CHB split N_CHB0 = 554 # Asian population size at CEU/CHB split N_ADMIX0 = 30000 # Initial size of admixed population # Epoch times T_AF0_AF1 = 5920 # initial increase in african pop. size T_AF1_OOA = 2040 # Time of OOA event T_CEU_CHB = 920 # Time of european/asian split T_ADMIX0 = 12 # Migration rates m_AF1_OOA = 1.5e-4 # Bidirectional migration rate between african and OOA pops. m_AF1_CEU0 = 2.5e-5 # Migration rates between AF1 and CEU0 m_AF1_CHB0 = 7.8e-6 # Migration rates between AF1 and CHB0 m_CEU0_CHB0 = 3.11e-5 # Migration rates between CEU0 and CHB0 # Mass migration to create admixed populations mm_AF1 = 1 / 6 # Adjusted fraction for remaining population after AF migration (5/6 * 2/5 = 1/3) mm_CEU0 = 2 / 5 # Adjusted fraction for remaining population (1/2 * 1 = 1/2) mm_CHB0 = 1.0 # Growth rates r_CEU0 = 3.8e-3 r_CHB0 = 4.8e-3 r_ADMIX0 = 0.05 # Calculate population sizes at modern (T=0) time N_CEU1 = N_CEU0 * math.exp(r_CEU0 * T_CEU_CHB) N_CHB1 = N_CHB0 * math.exp(r_CHB0 * T_CEU_CHB) N_ADMIX1 = N_ADMIX0 * math.exp(r_ADMIX0 * T_ADMIX0) # Set population sizes at T=0 # pop0 is Africa, pop1 is Europe, pop2 is Asia, pop3 is admixed self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_AF1, growth_rate=0), msprime.PopulationConfiguration(initial_size=N_CEU1, growth_rate=r_CEU0), msprime.PopulationConfiguration(initial_size=N_CHB1, growth_rate=r_CHB0), msprime.PopulationConfiguration(initial_size=N_ADMIX1, growth_rate=r_ADMIX0) ] # Migration matrix, all migrations to admixed population are 0 self.migration_matrix = [[0, m_AF1_CEU0, m_AF1_CHB0, 0], [m_AF1_CEU0, 0, m_CEU0_CHB0, 0], [m_AF1_CHB0, m_CEU0_CHB0, 0, 0], [0, 0, 0, 0]] # Now we add the demographic events working backwards in time. self.demographic_events = [ # Admixed population recoalesces with origin populations (T_ADMIX0) msprime.MassMigration(time=T_ADMIX0, source=3, destination=0, proportion=mm_AF1), msprime.MassMigration(time=T_ADMIX0 + 0.0001, source=3, destination=1, proportion=mm_CEU0), msprime.MassMigration(time=T_ADMIX0 + 0.0002, source=3, destination=2, proportion=mm_CHB0), # Zero out migration rate (desn't matter but added for equality to prod.) msprime.MigrationRateChange(time=T_CEU_CHB, rate=0.0), # CEU and CHB coalesce and set population to OOA size (T_CEU_CHB) msprime.MassMigration(time=T_CEU_CHB + 0.0001, source=2, destination=1, proportion=1.0), msprime.PopulationParametersChange(time=T_CEU_CHB + 0.0002, initial_size=N_OOA, growth_rate=0.0, population_id=1), # Set OOA <--> AF migration rate (T_CEU_CHB) msprime.MigrationRateChange(time=T_CEU_CHB + 0.0003, rate=m_AF1_OOA, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_CEU_CHB + 0.0003, rate=m_AF1_OOA, matrix_index=(1, 0)), # Zero out migration rate (desn't matter but added for equality to prod.) msprime.MigrationRateChange(time=T_AF1_OOA, rate=0.0), # OOA and AF1 coalesce (T_OOA) msprime.MassMigration(time=T_AF1_OOA + 0.0001, source=1, destination=0, proportion=1.0), # AF1 -> AF0 population size change (T_AF0_AF1) msprime.PopulationParametersChange(time=T_AF0_AF1, initial_size=N_AF0, population_id=0), ]
def __init__(self): # All parameters were taken from table 1 of Ragsdale et al. (2019) generation_time = 29 # Population sizes N_0 = 3600 # Size of archaic populations N_YRI = 13900 # Fixed size of YRI population N_B = 880 # Size of OOA population N_CEU0 = 2300 # Size of CEU population at CEU-CHB split N_CHB0 = 650 # Size of CHB population at CEU-CHB split # Population growth parameters r_CEU = 0.125e-2 r_CHB = 0.372e-2 # Migration parameters m_AF_B = 52.2e-5 m_YRI_CEU = 2.48e-5 m_YRI_CHB = 0 m_CEU_CHB = 11.3e-5 m_AF_ARCHAF = 1.98e-5 m_OOA_NEAN = 0.825e-5 # Epoch times T_AF = 300e3 / generation_time T_OOA = 60.7e3 / generation_time T_CEU_CHB = 36e3 / generation_time T_ARCHAF_split = 499e3 / generation_time T_ARCHAF_mig = 125e3 / generation_time T_NEAN_split = 559e3 / generation_time T_ARCH_ADMIX_end = 18.7e3 / generation_time # Calculate population sizes at modern (T=0) time N_CEU1 = N_CEU0 * math.exp(r_CEU * T_CEU_CHB) N_CHB1 = N_CHB0 * math.exp(r_CHB * T_CEU_CHB) # Set population sizes at T=0 # pop0 is Africa, pop1 is Europe, pop2 is Asia, pop3 is Neanderthal, pop4 is # archaic african self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_YRI, growth_rate=0), msprime.PopulationConfiguration(initial_size=N_CEU1, growth_rate=r_CEU), msprime.PopulationConfiguration(initial_size=N_CHB1, growth_rate=r_CHB), msprime.PopulationConfiguration(initial_size=N_0, growth_rate=0), msprime.PopulationConfiguration(initial_size=N_0, growth_rate=0) ] # Setup initial migration matrix self.migration_matrix = [ [0, m_YRI_CEU, m_YRI_CHB, 0, 0], # noqa [m_YRI_CEU, 0, m_CEU_CHB, 0, 0], # noqa [m_YRI_CHB, m_CEU_CHB, 0, 0, 0], # noqa [0, 0, 0, 0, 0], # noqa [0, 0, 0, 0, 0] # noqa ] self.demographic_events = [ # Migration between YRI and ARCHAF(E1) msprime.MigrationRateChange(time=T_ARCH_ADMIX_end, rate=m_AF_ARCHAF, matrix_index=(0, 4)), msprime.MigrationRateChange(time=T_ARCH_ADMIX_end, rate=m_AF_ARCHAF, matrix_index=(4, 0)), # Migration between CEU and NEAN(E1) msprime.MigrationRateChange(time=T_ARCH_ADMIX_end, rate=m_OOA_NEAN, matrix_index=(1, 3)), msprime.MigrationRateChange(time=T_ARCH_ADMIX_end, rate=m_OOA_NEAN, matrix_index=(3, 1)), # Migration between CHB and NEAN(E1) msprime.MigrationRateChange(time=T_ARCH_ADMIX_end, rate=m_OOA_NEAN, matrix_index=(2, 3)), msprime.MigrationRateChange(time=T_ARCH_ADMIX_end, rate=m_OOA_NEAN, matrix_index=(3, 2)), # Coalescence of CHB into CEU (E2) msprime.MassMigration(time=T_CEU_CHB, source=2, dest=1, proportion=1.0), # Reset migration rates (E2)(redundant)* msprime.MigrationRateChange(time=T_CEU_CHB, rate=0.0), # Migration rate change between OOA(CEU) and AF(YRI)(E2) msprime.MigrationRateChange(time=T_CEU_CHB, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_CEU_CHB, rate=m_AF_B, matrix_index=(1, 0)), # Migration between YRI and ARCHAF (E2)(redundant without mig. rate reset)* msprime.MigrationRateChange(time=T_CEU_CHB, rate=m_AF_ARCHAF, matrix_index=(0, 4)), msprime.MigrationRateChange(time=T_CEU_CHB, rate=m_AF_ARCHAF, matrix_index=(4, 0)), # Migration between CEU and NEAN (E2)(redundant without mig. rate reset)* msprime.MigrationRateChange(time=T_CEU_CHB, rate=m_OOA_NEAN, matrix_index=(1, 3)), msprime.MigrationRateChange(time=T_CEU_CHB, rate=m_OOA_NEAN, matrix_index=(3, 1)), # CEU change to fixed population size at the time of the CHB/CEU coal. (E2) msprime.PopulationParametersChange(time=T_CEU_CHB, initial_size=N_B, growth_rate=0, population_id=1), # Coalescence between the OOA and AF pops (E3) msprime.MassMigration(time=T_OOA, source=1, destination=0, proportion=1.0), # Reset migration rates (E3) msprime.MigrationRateChange(time=T_OOA, rate=0.0), # Migration between YRI and ARCHAF (E3) msprime.MigrationRateChange(time=T_OOA, rate=m_AF_ARCHAF, matrix_index=(0, 4)), msprime.MigrationRateChange(time=T_OOA, rate=m_AF_ARCHAF, matrix_index=(4, 0)), # Migration between archaic african and african pop. "ends" (E4) msprime.MigrationRateChange(time=T_ARCHAF_mig, rate=0), # AF reverts to ancestral population size pre OOA (E5) msprime.PopulationParametersChange(time=T_AF, initial_size=N_0, population_id=0), # Archaic AF population coalesces into AF (E6) msprime.MassMigration(time=T_ARCHAF_split, source=4, dest=0, proportion=1.0), # NEAN pop. coalesces into AF (E7) msprime.MassMigration(time=T_NEAN_split, source=3, dest=0, proportion=1.0) ]
def _sim_admix_12(nreps, Ns=500000, gen=20): # Set the ML values of various parameters Taus = np.array([0, 1, 2, 3, 4, 5]) * 1e4 * gen # Migration rates C -> B and from IJ -> EF m_C_B = 2e-6 m_IJ_EF = 2e-6 # Population IDs correspond to their indexes in pop_config. ntips = len(tree.tree) pop_config = [ ms.PopulationConfiguration(sample_size=2, initial_size=Ns) for i in range(ntips) ] ## migration matrix all zeros time=0 migmat = np.zeros((ntips, ntips)).tolist() ## set up demography demog = [ ## initial migration from C -> B ms.MigrationRateChange(time=0, rate=m_C_B, matrix_index=(1, 2)), ms.MigrationRateChange(time=Taus[1], rate=0), # merge events at time 1 (b,a), (f,e), (j,i) ms.MassMigration(time=Taus[1], source=1, destination=0, proportion=1.0), ms.MassMigration(time=Taus[1], source=5, destination=4, proportion=1.0), ms.MassMigration(time=Taus[1], source=9, destination=8, proportion=1.0), ## migration from IJ -> EF (backward in time) ms.MigrationRateChange(time=Taus[1], rate=m_IJ_EF, matrix_index=(4, 8)), ## merge events at time 2 (c,a), (g,e), (k,i) ms.MassMigration(time=Taus[2], source=2, destination=0, proportion=1.0), ms.MassMigration(time=Taus[2], source=6, destination=4, proportion=1.0), ms.MassMigration(time=Taus[2], source=10, destination=8, proportion=1.0), ## end migration at ABC and merge ms.MigrationRateChange(time=Taus[2], rate=0), ms.MassMigration(time=Taus[3], source=3, destination=0, proportion=1.0), ms.MassMigration(time=Taus[3], source=7, destination=4, proportion=1.0), ms.MassMigration(time=Taus[3], source=11, destination=8, proportion=1.0), ## merge EFJH -> IJKL ms.MassMigration(time=Taus[4], source=8, destination=4, proportion=1.0), ## merge ABCD -> EFJHIJKL ms.MassMigration(time=Taus[5], source=4, destination=0, proportion=1.0), ] ## sim the data replicates = ms.simulate(population_configurations=pop_config, migration_matrix=migmat, demographic_events=demog, num_replicates=nreps, length=100, mutation_rate=1e-9) return replicates
def out_of_africa(prefix, nhaps, recomb): """ Specify the demographic model used in these simulations based on Dr. Jouganous optimization of Gravel et. al's 2011 model. Function taken and modified from Dr. Alicia Martin """ if os.path.isfile(prefix+'.simulation.hdf5'): simulation = ms.load(prefix+'.simulation.hdf5') line = 'Simulation %s has been DONE! Loading %s.simulation.hdf5' print(line%(prefix, prefix)) else: ## First we set out the maximum likelihood values of the various ## parameters given in Gravel et al, 2017 Table 2 but updated to ## Dr. Jouganous work N_A = 11273 N_B = 3104 N_AF = 23721 N_EU0 = 2271 N_AS0 = 924 ## Times are provided in years, so we convert into generations. generation_time = 29 # according to doi:10.1086/302770 T_AF = 312e3 / generation_time T_B = 125e3 / generation_time T_EU_AS = 42.3e3 / generation_time ## We need to work out the starting (diploid) population sizes based on ## the growth rates provided for these two populations r_EU = 0.00196 r_AS = 0.00309 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) ## Migration rates during the various epochs. m_AF_B = 15.80e-5 m_AF_EU = 1.10e-5 m_AF_AS = 0.48e-5 m_EU_AS = 4.19e-5 ## Population IDs correspond to their indexes in the population ## configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB ## initially. population_configurations = [ ms.PopulationConfiguration( sample_size=nhaps[0], initial_size=N_AF), ms.PopulationConfiguration( sample_size=nhaps[1], initial_size=N_EU, growth_rate=r_EU), ms.PopulationConfiguration( sample_size=nhaps[2], initial_size=N_AS, growth_rate=r_AS) ] ## define the migration matrix migration_matrix = [ [ 0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0], ] ## define the demographic events (mergers and splits) demographic_events = [ ## CEU and CHB merge into B with rate changes at T_EU_AS ms.MassMigration( time=T_EU_AS, source=2, destination=1, proportion=1.0), ms.MigrationRateChange(time=T_EU_AS, rate=0), ms.MigrationRateChange( time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), ms.MigrationRateChange( time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), ms.PopulationParametersChange( time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), ## Population B merges into YRI at T_B ms.MassMigration( time=T_B, source=1, destination=0, proportion=1.0), ## Size changes to N_A at T_AF ms.PopulationParametersChange( time=T_AF, initial_size=N_A, population_id=0) ] ## Use the demography debugger to print out the demographic history ## that we have just described. dp = ms.DemographyDebugger( Ne=N_A, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dp.print_history() with open('%s.demography.txt'%(prefix),'w') as fn: dp.print_history(output=fn) settings = { 'population_configurations': population_configurations, 'migration_matrix': migration_matrix, 'demographic_events': demographic_events, 'mutation_rate': 1.44e-8, #according to 10.1371/journal.pgen.1004023 'recombination_map': ms.RecombinationMap.read_hapmap(recomb) } print('Starting Simulation...\t%s'%(current_time())) simulation = ms.simulate(**settings) simulation.dump('%s.simulation.hdf5'%(prefix), True) print('Simulation %s DONE!\t%s'%(prefix, current_time())) return simulation
def sim_ongoing_interval(rec_map=None, L=3e9, Ne=10000, Nadmix=500, Tadmix_start=4, Tadmix_stop=12, frac_ongoing=0.05, seed=None, path=None, tszip=None): """ Simulate an ongoing model of admixture. With the disrete-time backwards wright-fisher. A new population (2) is formed by splitting off from population 0. At time=Tadmix_start migration starts from population 1, with rate frac_ongoing admixture continues until Tadmix_stop. rec_map = valid msprime recombination map L = length of genome, in base pairs (ignored if rec_map is specified) Ne = diploid population size for all three populations Tadmix = time of admixture Nadmix = number of observed admixed individuals seed = seed to pass to msprime.simulate path = file path, if given will write the ts to this path (NOT IMPLEMENTED) """ assert Tadmix_stop > Tadmix_start, "Tadmix_stop must be greater than Tadmix_start" Tadmix_start = int(Tadmix_start) Tadmix_stop = int(Tadmix_stop) Ne = int(Ne) Nadmix = int(Nadmix) # recombination map if rec_map: recomb_map = rec_map else: L = int(L) recomb_map = msprime.RecombinationMap.uniform_map(L, 1e-8, L) pop_configs = [ msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0), msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0), msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0) ] mig_mat = [ [0, 0, 0], [0, 0, 0], [0, 0, 0], ] admixture_events = [ # migration during the interval Tadmix_start - Tadmix_stop msprime.MigrationRateChange(time=Tadmix_start, rate=frac_ongoing, matrix_index=(2, 1)), msprime.MigrationRateChange(time=Tadmix_stop, rate=0, matrix_index=(2, 1)), # founding of pop 2 msprime.MassMigration(time=Tadmix_stop + 1, source=2, destination=0, proportion=1.0), ] samps = [msprime.Sample(population=2, time=0)] * 2 * Nadmix ts_admix = msprime.simulate( population_configurations=pop_configs, migration_matrix=mig_mat, demographic_events=admixture_events, recombination_map=recomb_map, mutation_rate=0, model='dtwf', samples=samps, random_seed=seed, start_time=0, end_time=Tadmix_stop + 2 ) return(ts_admix)
def test_migration_rate_change(self): g = 512 Ne = 8192 event = msprime.MigrationRateChange(time=g, rate=1) self.check_time(event, g, Ne)
def create_simulation_runner(parser, arg_list): """ Parses the arguments and returns a SimulationRunner instance. """ args = parser.parse_args(arg_list) if args.mutation_rate == 0 and not args.trees: parser.error("Need to specify at least one of --theta or --trees") num_loci = int(args.recombination[1]) if args.recombination[1] != num_loci: parser.error("Number of loci must be integer value") if args.recombination[0] != 0.0 and num_loci < 2: parser.error("Number of loci must > 1") r = 0.0 # We don't scale recombination or mutation rates by the size # of the region. if num_loci > 1: r = args.recombination[0] / (num_loci - 1) mu = args.mutation_rate / num_loci # Check the structure format. symmetric_migration_rate = 0.0 num_populations = 1 population_configurations = [ msprime.PopulationConfiguration(args.sample_size) ] migration_matrix = [[0.0]] if args.structure is not None: num_populations = convert_int(args.structure[0], parser) # We must have at least num_population sample_configurations if len(args.structure) < num_populations + 1: parser.error("Must have num_populations sample sizes") population_configurations = [None for j in range(num_populations)] for j in range(num_populations): population_configurations[j] = msprime.PopulationConfiguration( convert_int(args.structure[j + 1], parser)) total = sum(conf.sample_size for conf in population_configurations) if total != args.sample_size: parser.error("Population sample sizes must sum to sample_size") # We optionally have the overall migration_rate here if len(args.structure) == num_populations + 2: symmetric_migration_rate = convert_float( args.structure[num_populations + 1], parser) check_migration_rate(parser, symmetric_migration_rate) elif len(args.structure) > num_populations + 2: parser.error("Too many arguments to --structure/-I") if num_populations > 1: migration_matrix = [[ symmetric_migration_rate / (num_populations - 1) * int(j != k) for j in range(num_populations) ] for k in range(num_populations)] else: if len(args.migration_matrix_entry) > 0: parser.error("Cannot specify migration matrix entries without " "first providing a -I option") if args.migration_matrix is not None: parser.error("Cannot specify a migration matrix without " "first providing a -I option") if args.migration_matrix is not None: migration_matrix = convert_migration_matrix(parser, args.migration_matrix, num_populations) for matrix_entry in args.migration_matrix_entry: dest = convert_population_id(parser, matrix_entry[0], num_populations) source = convert_population_id(parser, matrix_entry[1], num_populations) rate = matrix_entry[2] if dest == source: parser.error("Cannot set diagonal elements in migration matrix") check_migration_rate(parser, rate) migration_matrix[dest][source] = rate # Set the initial demography demographic_events = [] if args.growth_rate is not None: for config in population_configurations: config.growth_rate = args.growth_rate for population_id, growth_rate in args.population_growth_rate: pid = convert_population_id(parser, population_id, num_populations) population_configurations[pid].growth_rate = growth_rate for population_id, size in args.population_size: pid = convert_population_id(parser, population_id, num_populations) population_configurations[pid].initial_size = size # First we look at population split events. We do this differently # to ms, as msprime requires a fixed number of population. Therefore, # modify the number of populations to take into account populations # splits. This is a messy hack, and will probably need to be changed. for index, (t, population_id, proportion) in args.admixture: check_event_time(parser, t) pid = convert_population_id(parser, population_id, num_populations) if proportion < 0 or proportion > 1: parser.error("Proportion value must be 0 <= p <= 1.") # In ms, the probability of staying in source is p and the probabilty # of moving to the new population is 1 - p. event = (index, msprime.MassMigration(t, pid, num_populations, 1 - proportion)) demographic_events.append(event) num_populations += 1 # We add another element to each row in the migration matrix # along with an other row. All new entries are zero. for row in migration_matrix: row.append(0) migration_matrix.append([0 for j in range(num_populations)]) # Add another PopulationConfiguration object with a sample size # of zero. population_configurations.append(msprime.PopulationConfiguration(0)) # Add the demographic events for index, (t, alpha) in args.growth_rate_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eG") check_event_time(parser, t) demographic_events.append( (index, msprime.PopulationParametersChange(time=t, growth_rate=alpha))) for index, (t, population_id, alpha) in args.population_growth_rate_change: pid = convert_population_id(parser, population_id, num_populations) check_event_time(parser, t) demographic_events.append( (index, msprime.PopulationParametersChange(time=t, growth_rate=alpha, population_id=pid))) for index, (t, x) in args.size_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eN") check_event_time(parser, t) demographic_events.append( (index, msprime.PopulationParametersChange(time=t, initial_size=x, growth_rate=0))) for index, (t, population_id, x) in args.population_size_change: check_event_time(parser, t) pid = convert_population_id(parser, population_id, num_populations) demographic_events.append( (index, msprime.PopulationParametersChange(time=t, initial_size=x, growth_rate=0, population_id=pid))) for index, (t, source, dest) in args.population_split: check_event_time(parser, t) source_id = convert_population_id(parser, source, num_populations) dest_id = convert_population_id(parser, dest, num_populations) demographic_events.append( (index, msprime.MassMigration(t, source_id, dest_id, 1.0))) # Set the migration rates for source to 0 for j in range(num_populations): if j != source_id: event = msprime.MigrationRateChange(t, 0.0, (j, source_id)) demographic_events.append((index, event)) # Demographic events that affect the migration matrix if num_populations == 1: condition = (len(args.migration_rate_change) > 0 or len(args.migration_matrix_entry_change) > 0 or len(args.migration_matrix_change) > 0) if condition: parser.error("Cannot change migration rates for 1 population") for index, (t, x) in args.migration_rate_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eM") check_migration_rate(parser, x) check_event_time(parser, t) event = msprime.MigrationRateChange(t, x / (num_populations - 1)) demographic_events.append((index, event)) for index, event in args.migration_matrix_entry_change: t = event[0] check_event_time(parser, t) dest = convert_population_id(parser, event[1], num_populations) source = convert_population_id(parser, event[2], num_populations) if dest == source: parser.error("Cannot set diagonal elements in migration matrix") rate = event[3] check_migration_rate(parser, rate) msp_event = msprime.MigrationRateChange(t, rate, (dest, source)) demographic_events.append((index, msp_event)) for index, event in args.migration_matrix_change: if len(event) < 3: parser.error("Need at least three arguments to -ma") if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-ema") t = convert_float(event[0], parser) check_event_time(parser, t) if convert_int(event[1], parser) != num_populations: parser.error( "num_populations must be equal for new migration matrix") matrix = convert_migration_matrix(parser, event[2:], num_populations) for j in range(num_populations): for k in range(num_populations): if j != k: msp_event = msprime.MigrationRateChange( t, matrix[j][k], (j, k)) demographic_events.append((index, msp_event)) # We've created all the events, now we need to rescale the migration rates # We assume Ne = 1 here. for _, msp_event in demographic_events: msp_event.time *= 4 if isinstance(msp_event, msprime.PopulationParametersChange): msp_event.growth_rate /= 4 if isinstance(msp_event, msprime.MigrationRateChange): # Divide by 4 to get a per-generation rate, assuming Ne=1 msp_event.rate /= 4 # We also need to rescale the migration matrix and growth rates. migration_matrix = [[m / 4 for m in row] for row in migration_matrix] for config in population_configurations: config.growth_rate /= 4 demographic_events.sort(key=lambda x: (x[0], x[1].time)) time_sorted = sorted(demographic_events, key=lambda x: x[1].time) if demographic_events != time_sorted: parser.error("Demographic events must be supplied in non-decreasing " "time order") runner = SimulationRunner( sample_size=args.sample_size, num_loci=num_loci, migration_matrix=migration_matrix, population_configurations=population_configurations, demographic_events=[event for _, event in demographic_events], num_replicates=args.num_replicates, scaled_recombination_rate=r, scaled_mutation_rate=mu, precision=args.precision, print_trees=args.trees, random_seeds=args.random_seeds) return runner
def out_of_africa(): # First we set out the maximum likelihood values of the various parameters # given in Table 1. N_A = 7300 N_B = 2100 N_AF = 12300 N_EU0 = 1000 N_AS0 = 510 # Times are provided in years, so we convert into generations. generation_time = 25 T_AF = 220e3 / generation_time T_B = 140e3 / generation_time T_EU_AS = 21.2e3 / generation_time # We need to work out the starting population sizes based on the growth # rates provided for these two populations r_EU = 0.004 r_AS = 0.0055 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) # Migration rates during the various epochs. m_AF_B = 25e-5 m_AF_EU = 3e-5 m_AF_AS = 1.9e-5 m_EU_AS = 9.6e-5 # Population IDs correspond to their indexes in the popupulation # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. population_configurations = [ msprime.PopulationConfiguration( sample_size=0, initial_size=N_AF), msprime.PopulationConfiguration( sample_size=1, initial_size=N_EU, growth_rate=r_EU), msprime.PopulationConfiguration( sample_size=1, initial_size=N_AS, growth_rate=r_AS) ] migration_matrix = [ [ 0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0], ] demographic_events = [ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration( time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange( time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange( time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange( time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration( time=T_B, source=1, destination=0, proportion=1.0), # Size changes to N_A at T_AF msprime.PopulationParametersChange( time=T_AF, initial_size=N_A, population_id=0) ] # Use the demography debugger to print out the demographic history # that we have just described. dp = msprime.DemographyDebugger( Ne=N_A, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dp.print_history()
def __init__(self): self.generation_time = 29 # Just information # sizes of populations N_YRI = 48433 N_CEU = 6962 N_CHB = 9025 N_Papuan = 8834 N_DenA = 5083 t_DenA = 2058 # generations N_NeanA = 826 t_NeanA = 2612 # generations N_Nean1 = 13249 N_Den1 = N_Nean1 N_Den2 = N_Nean1 N_Ghost = 8516 # after coalescences N_CEU_CHB = 12971 N_Human = 41563 N_DenAnc = 100 N_A = 32671 # bottlenecks N_CEU_CHB_bot = 2231 N_GhostA_bot = 1394 N_Papuan_bot = 243 # times of coalesces t_CEU_CHB = 1293 t_CEU_Ghost = 1758 t_Papuan_Ghost = 1784 t_YRI_GhostA = 2218 t_Nean1_NeanA = 3375 t_Den1_DenA = 9750 t_Den1_Den2 = 12500 t_Den_Nean = 15090 t_Human_Den_Nean = 20225 # times of bottlenecks t_CEU_CHB_bot = 1659 t_Papuan_bot = 1685 t_GhostA_bot = 2119 # migrations m_YRI_Ghost = 0.000179 m_Ghost_CEU = 0.000442 m_CEU_CHB = 3.14e-5 m_CHB_Papuan = 5.72e-5 m_CEUCHB_Papua = 0.000572 m_Ghost_CEUCHB = 0.000442 # times and proportions of admixtures p1 = 0.55 t_Nean1_to_CHB = 883 p_Nean1_to_CHB = 0.002 t_Den2_to_Papuan = 45.7e3 / self.generation_time p_Den2_to_Papuan = (1 - p1) * 0.04 t_Den1_to_Papuan = 29.8e3 / self.generation_time p_Den1_to_Papuan = p1 * 0.04 t_Nean1_to_Papuan = 1412 p_Nean1_to_Papuan = 0.002 t_Nean1_to_CEU_CHB = 1566 p_Nean1_to_CEU_CHB = 0.011 t_Nean1_to_GhostA = 1853 p_Nean1_to_GhostA = 0.024 # set up populations self.population_configurations = [ msprime.PopulationConfiguration( # 0 YRI initial_size=N_YRI, growth_rate=0, metadata={ "name": "YRI", "sampling_time": 0 }), msprime.PopulationConfiguration( # 1 CEU initial_size=N_CEU, growth_rate=0, metadata={ "name": "CEU", "sampling_time": 0 }), msprime.PopulationConfiguration( # 2 CHB initial_size=N_CHB, growth_rate=0, metadata={ "name": "CHB", "sampling_time": 0 }), msprime.PopulationConfiguration( # 3 Papuan initial_size=N_Papuan, growth_rate=0, metadata={ "name": "Papuan", "sampling_time": 0 }), msprime.PopulationConfiguration( # 4 DenA initial_size=N_DenA, growth_rate=0, metadata={ "name": "DenA", "sampling_time": t_DenA }), msprime.PopulationConfiguration( # 5 NeanA initial_size=N_NeanA, growth_rate=0, metadata={ "name": "NeanA", "sampling_time": t_NeanA }), msprime.PopulationConfiguration( # 6 Den1 initial_size=N_Den1, growth_rate=0, metadata={ "name": "Den1", "sampling_time": 0 }), msprime.PopulationConfiguration( # 7 Den2 initial_size=N_Den2, growth_rate=0, metadata={ "name": "Den2", "sampling_time": 0 }), msprime.PopulationConfiguration( # 8 Nean1 initial_size=N_Nean1, growth_rate=0, metadata={ "name": "Nean1", "sampling_time": 0 }), msprime.PopulationConfiguration( # 9 Ghost initial_size=N_Ghost, growth_rate=0, metadata={ "name": "Ghost", "sampling_time": 0 }) ] self.migration_matrix = [[0] * 10 for _ in range(10)] self.migration_matrix[0][9] = m_YRI_Ghost self.migration_matrix[9][0] = m_YRI_Ghost self.migration_matrix[1][9] = m_Ghost_CEU self.migration_matrix[9][1] = m_Ghost_CEU self.migration_matrix[1][2] = m_CEU_CHB self.migration_matrix[2][1] = m_CEU_CHB self.migration_matrix[2][3] = m_CHB_Papuan self.migration_matrix[3][2] = m_CHB_Papuan self.demographic_events = [ # Coalescence of CEU and CHB into CHB msprime.MassMigration(time=t_CEU_CHB, source=1, destination=2, proportion=1.), # Set size of CEU+CHB population msprime.PopulationParametersChange(time=t_CEU_CHB, initial_size=N_CEU_CHB, population_id=2), # Change migration matrix msprime.MigrationRateChange(time=t_CEU_CHB, rate=0, matrix_index=(2, 1)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=0, matrix_index=(1, 2)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=0, matrix_index=(3, 2)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=0, matrix_index=(2, 3)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=0, matrix_index=(9, 1)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=0, matrix_index=(1, 9)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=m_CEUCHB_Papua, matrix_index=(2, 3)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=m_CEUCHB_Papua, matrix_index=(3, 2)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=m_Ghost_CEUCHB, matrix_index=(2, 9)), msprime.MigrationRateChange(time=t_CEU_CHB, rate=m_Ghost_CEUCHB, matrix_index=(9, 2)), # Set bottleneck size of CEU+CHB population msprime.PopulationParametersChange(time=t_CEU_CHB_bot, initial_size=N_CEU_CHB_bot, population_id=2), # Change migration matrix msprime.MigrationRateChange(time=t_CEU_CHB_bot, rate=0), # Set bottleneck size of Papuan population msprime.PopulationParametersChange(time=t_Papuan_bot, initial_size=N_Papuan_bot, population_id=3), # Coalescence of CEU+CHB and Ghost to Ghost msprime.MassMigration(time=t_CEU_Ghost, source=2, destination=9, proportion=1.), # Coalescence of Papuan and Ghost to GhostA msprime.MassMigration(time=t_Papuan_Ghost, source=3, destination=9, proportion=1.), # Set bottleneck size of GhostA population msprime.PopulationParametersChange(time=t_GhostA_bot, initial_size=N_GhostA_bot, population_id=9), # Coalescence of Ghost and YRI into Human (YRI) msprime.MassMigration(time=t_YRI_GhostA, source=9, destination=0, proportion=1.), # Set size of Human population msprime.PopulationParametersChange(time=t_YRI_GhostA, initial_size=N_Human, population_id=0), # Coalescence of NeanA and Nean1 into NeanAnc msprime.MassMigration(time=t_Nean1_NeanA, source=8, destination=5, proportion=1.), # Set size of NeanAnc population msprime.PopulationParametersChange(time=t_Nean1_NeanA, initial_size=N_Nean1, population_id=5), # Coalescence of Den1 and DenA into DenAnc (DenA) msprime.MassMigration(time=t_Den1_DenA, source=6, destination=4, proportion=1.), # Set size of DenA population msprime.PopulationParametersChange(time=t_Den1_DenA, initial_size=N_DenAnc, population_id=4), # Coalescence of DenAnc and Den2 into DenAnc msprime.MassMigration(time=t_Den1_Den2, source=7, destination=4, proportion=1.), # Set size of DenA population msprime.PopulationParametersChange(time=t_Den1_Den2, initial_size=N_DenAnc, population_id=4), # Coalescence of DenAnc and NeanAnc into Den_Nean (Nean1) msprime.MassMigration(time=t_Den_Nean, source=5, destination=4, proportion=1.), # Set size of Den_Nean population msprime.PopulationParametersChange(time=t_Den_Nean, initial_size=N_Nean1, population_id=4), # Coalescence of Den_Nean and Human into Anc (YRI) msprime.MassMigration(time=t_Human_Den_Nean, source=4, destination=0, proportion=1.), # Set ancestral size of population msprime.PopulationParametersChange(time=t_Human_Den_Nean, initial_size=N_A, population_id=0), # Admixture events # Admixture from Den1 to Papuans msprime.MassMigration(time=t_Den1_to_Papuan, source=3, destination=6, proportion=p_Den1_to_Papuan), # Admixture from Den2 to Papuans msprime.MassMigration(time=t_Den2_to_Papuan, source=3, destination=7, proportion=p_Den2_to_Papuan), # Admixture from Nean1 to GhostA msprime.MassMigration(time=t_Nean1_to_GhostA, source=9, destination=8, proportion=p_Nean1_to_GhostA), # Admixture from Nean1 to CEU+CHB msprime.MassMigration(time=t_Nean1_to_CEU_CHB, source=2, destination=8, proportion=p_Nean1_to_CEU_CHB), # Admixture from Nean1 to Papuans msprime.MassMigration(time=t_Nean1_to_Papuan, source=3, destination=8, proportion=p_Nean1_to_Papuan), # Admixture from Neandertal to East Asia population msprime.MassMigration(time=t_Nean1_to_CHB, proportion=p_Nean1_to_CHB, source=2, destination=8), ] self.demographic_events.sort(key=lambda x: x.time)
def out_of_africa(sample_n_AF, sample_n_EU, sample_n_AS): # First we set out the maximum likelihood values of the various parameters # given in Table 1. N_A = 7310 N_B = 1861 N_AF = 14474 # 12300 N_EU0 = 1032 # 1000 N_AS0 = 554 # 510 # Times are provided in years, so we convert into generations. generation_time = 25 T_AF = 148e3 / generation_time # 220e3 T_B = 51e3 / generation_time # 140e3 T_EU_AS = 23e3 / generation_time # 21.2e3 # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two populations r_EU = 0.0038 # 0.004 r_AS = 0.0048 # 0.0055 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) # Migration rates during the various epochs. m_AF_B = 15e-5 # 25e-5 m_AF_EU = 2.5e-5 # 3e-5 m_AF_AS = 0.78e-5 # 1.9e-5 m_EU_AS = 3.11e-5 # 9.6e-5 # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. # Set sample_size of YRI, CEU and CHB you want to output in the current generation population_configurations = [ msprime.PopulationConfiguration(sample_size=sample_n_AF, initial_size=N_AF), msprime.PopulationConfiguration(sample_size=sample_n_EU, initial_size=N_EU, growth_rate=r_EU), msprime.PopulationConfiguration(sample_size=sample_n_AS, initial_size=N_AS, growth_rate=r_AS) ] migration_matrix = [ [0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0], ] demographic_events = [ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), msprime.MigrationRateChange( time=T_B, rate=0), ## Missing in the old tutorial. update 03 06 2020 # Size changes to N_A at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dd.print_history() # set mutation_rate to you need to genotypes return msprime.simulate( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, length=2.5e8, recombination_rate=1e-8, mutation_rate=1.25e-8)
def __init__(self): # Since the Tennessen two population model largely uses parameters from # the Gravel et al 2001, we begin by taking the maximum likelihood # value from the table 2 of Gravel et al. 2011 using the Low-coverage + # exons data. We ignore all values related to the asian (AS) population # as it is not present in the Tennessen two population model. Initially # we copy over the pre- exponential growth population size estimates, # migration rates, and epoch times: generation_time = 25 N_A = 7310 # Ancient population size N_AF0 = 14474 # Pre-modern african population size (pre and post OOA) N_B = 1861 # OOA population size, pre-expansion N_EU0 = 1032 # European population size, pre-expansion m_AF0_B = 15e-5 # migration from pre-expansion africa to pre-expansion OOA m_AF1_EU1 = 2.5e-5 # migration from pre-expansion africa to 2nd-expansion euro T_AF = 148000 / generation_time # Epoch transition from ancient to AF0 T_B = 51000 / generation_time # OOA time # The european asian split time, begins 1st growth period T_EU_AS = 23000 / generation_time # Next we include the additional parameters from Tennessen et al 2012 # which include all exponential growth rates and the time of the second # round of growth in the European population/first round in the African # population. These parameters are copied from the section titled # "Abundance of rare variation explained by human demographic history" # in Tennessen et al. r_EU0 = 0.307e-2 # The growth rate for the 1st european expansion r_EU1 = 1.95e-2 # The growth rate for the 2nd european expansion r_AF0 = 1.66e-2 # The growth rate for the 1st african expansion T_AG = 5115 / generation_time # start of 2nd european growth epoch # For the post exponenential growth popuation sizes we can calcuate the # population sizes at the start of the epoch using the formula # f(t) = x_0 * exp(r * (t_0-t)) European population size after 1st expansion N_EU1 = N_EU0 * math.exp(r_EU0 * (T_EU_AS - T_AG)) # European population size after 2nd expansion N_EU2 = N_EU1 * math.exp(r_EU1 * T_AG) # African population size after 1st expansion N_AF1 = N_AF0 * math.exp(r_AF0 * T_AG) # Now we set up the population configurations. The population IDs are # 0=CEU and 1=YRI. This includes both the inital sizes, growth rates, # and migration rates. self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_AF1, growth_rate=r_AF0), msprime.PopulationConfiguration(initial_size=N_EU2, growth_rate=r_EU1) ] self.migration_matrix = [ [0, m_AF1_EU1], [m_AF1_EU1, 0], ] # Now we add the demographic events working backwards in time. Starting # with the growth slowdown in Europeans and the transition to a fixed # population size in Africans. self.demographic_events = [ # Set the migration rate for 1st CEU growth period (for now stays same) msprime.MigrationRateChange(time=T_AG, rate=m_AF1_EU1, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_AG, rate=m_AF1_EU1, matrix_index=(1, 0)), # Growth slowdown in Europeans msprime.PopulationParametersChange(time=T_AG, initial_size=N_EU1, growth_rate=r_EU0, population_id=1), # Reversion to fixed population size in Africans msprime.PopulationParametersChange(time=T_AG, initial_size=N_AF0, growth_rate=0, population_id=0), # Set the migration rate for pre CEU/CHB split msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF0_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF0_B, matrix_index=(1, 0)), # Reversion to fixed population size at the time of the CHB/CEU split msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Coalescence between the OOA and YRI pops msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), # Change to ancestral population size pre OOA msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ]
opts['m'] = opts['m_rel'] / (4 * opts['Ne']) opts['M'] = opts['M_rel'] / (4 * opts['Ne']) opts['T'] = opts['T_rel'] * (4 * opts['Ne']) npops = 3 pops = [ msprime.PopulationConfiguration(sample_size=opts['nsamples'], initial_size=opts['Ne'], growth_rate=0.0) for _ in range(npops) ] migr_init = [[0, opts['M'], opts['M']], [opts['m'], 0, opts['M']], [opts['M'], opts['M'], 0]] migr_change = [ msprime.MigrationRateChange(opts['T'], opts['m'], matrix_index=x) for x in [(1,2)] ] \ + [ msprime.MigrationRateChange(opts['T'], opts['M'], matrix_index=x) for x in [(1,0)] ] ts = msprime.simulate(Ne=opts['Ne'], length=opts['chrom_len'], recombination_rate=opts['recomb_rate'], population_configurations=pops, migration_matrix=migr_init, demographic_events=migr_change) logfile.write(" done simulating! Generating mutations.\n") logfile.write(time.strftime(' %X %x %Z\n')) logfile.flush() rng = msprime.RandomGenerator(mut_seed) nodes = msprime.NodeTable()
def __init__(self): generation_time = 25 # Population sizes N_A = 7300 N_AF = 12300 N_B = 2100 N_EU0 = 1000 N_AS0 = 510 # Growth rates per generation r_EU = 0.4e-2 r_AS = 0.55e-2 # Migration rates m_AF_B = 25e-5 m_AF_EU = 3e-5 m_AF_AS = 1.9e-5 m_EU_AS = 9.6e-5 # Epoch times T_AF = 220e3 / generation_time T_B = 140e3 / generation_time T_EU_AS = 21.2e3 / generation_time # Calculate population sizes at modern (T=0) time N_EUF = N_EU0 * math.exp(r_EU * T_EU_AS) N_ASF = N_AS0 * math.exp(r_AS * T_EU_AS) self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_AF, growth_rate=0), msprime.PopulationConfiguration(initial_size=N_EUF, growth_rate=r_EU), msprime.PopulationConfiguration(initial_size=N_ASF, growth_rate=r_AS) ] # Setup initial migration matrix self.migration_matrix = [[0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0]] self.demographic_events = [ # CEU and CHB merge into B, reset migration rates to Af-B, change pop size msprime.MassMigration(time=T_EU_AS, source=2, dest=1, proportion=1), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, population_id=1, growth_rate=0), # B and AF merge, turn migration off, reset population size msprime.MassMigration(time=T_B, source=1, dest=0, proportion=1), msprime.MigrationRateChange(time=T_B, rate=0), # Ancestral size change, reset population size msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ]
def out_of_africa(N_haps, no_migration): N_A = 7300 N_B = 2100 N_AF = 12300 N_EU0 = 1000 N_AS0 = 510 # Times are provided in years, so we convert into generations. generation_time = 25 T_AF = 220e3 / generation_time T_B = 140e3 / generation_time T_EU_AS = 21.2e3 / generation_time # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two populations r_EU = 0.004 r_AS = 0.0055 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) # Migration rates during the various epochs. if no_migration: m_AF_B = 0 m_AF_EU = 0 m_AF_AS = 0 m_EU_AS = 0 else: m_AF_B = 25e-5 m_AF_EU = 3e-5 m_AF_AS = 1.9e-5 m_EU_AS = 9.6e-5 # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. n_pops = 3 population_configurations = [ msprime.PopulationConfiguration(sample_size=2 * N_haps[0], initial_size=N_AF), msprime.PopulationConfiguration(sample_size=2 * N_haps[1], initial_size=N_EU, growth_rate=r_EU), msprime.PopulationConfiguration(sample_size=2 * N_haps[2], initial_size=N_AS, growth_rate=r_AS) ] migration_matrix = [ [0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0], ] demographic_events = [ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), # Size changes to N_A at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ] # Return the output required for a simulation study. return population_configurations, migration_matrix, demographic_events, N_A, n_pops
msprime.PopulationConfiguration(sample_size=1, initial_size=N_AS, growth_rate=r_AS) ] migration_matrix = [ [0, m_AF_EU, m_AF_AS], [m_AF_EU, 0, m_EU_AS], [m_AF_AS, m_EU_AS, 0], ] demographic_events = [ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), # Size changes to N_A at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ]
def f(time=1, rate=1, matrix_index=None): return msprime.MigrationRateChange( time=time, rate=rate, matrix_index=matrix_index)
def main(nhaps=None, nvars=None, rec_map=None, maf=None, to_bed=False, threads=1, labels=['AFR', 'EUR', 'ASN', 'MX', 'AD'], split_out=False, plot_pca=True, focus_pops=['AFR', 'EUR']): if nhaps is None: nhaps = [45000] * 5 if nvars is None: nvars = int(1e6) # First we set out the maximum likelihood values of the various parameters # given in Table 1. N_A = 11273 N_B = 3104 N_AF = 23721 N_EU0 = 2271 N_AS0 = 924 N_MX0 = 800 # From Table 2 Gutenkust 2009 # Times are provided in years, so we convert into generations. # from Jouganous et al. 2017: generation_time = 29 T_AF = 312e3 / generation_time T_B = 125e3 / generation_time T_EU_AS = 42.3e3 / generation_time T_MX = 12.2e3 / generation_time # from table 1 Gravel 2013 T_AD = 18 # confidence interval # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two population r_EU = 0.0019 r_AS = 0.00309 r_MX = 0.0050 # From Table 2 Gutenkust r_AD = 0.05 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) N_MX = N_MX0 / math.exp(-r_MX * T_MX) N_AD0 = N_MX0 # / math.exp(-r_MX * T_AD)) / 3 N_AD = (N_AD0 / math.exp(-r_AD * T_AD))# * 0.45 # Migration rates during the various epochs. m_AF_B = 15.8e-5 m_AF_EU = 1.1e-5 m_AF_AS = 0.48e-5 m_EU_AS = 4.19e-5 m_MX_AD = 5e-5 # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU, 2=CHB, and 3=MX # initially. population_configurations = [ msprime.PopulationConfiguration(sample_size=nhaps[0], initial_size=N_AF ), msprime.PopulationConfiguration(sample_size=nhaps[1], initial_size=N_EU, growth_rate=r_EU), msprime.PopulationConfiguration(sample_size=nhaps[2], initial_size=N_AS, growth_rate=r_AS), msprime.PopulationConfiguration(sample_size=nhaps[3], initial_size=N_MX, growth_rate=r_MX), msprime.PopulationConfiguration(sample_size=nhaps[4], initial_size=N_AD, growth_rate=r_AD) ] # Migrations AFR EUR ASN MX AD migration_matrix = [[ 0, m_AF_EU, m_AF_AS, 0, 0], # AFR [m_AF_EU, 0, m_EU_AS, 0, 0], # EUR [m_AF_AS, m_EU_AS, 0, 0, 0], # ASN [ 0, 0, 0, 0, m_MX_AD], # MX [ 0, 0, 0, m_MX_AD, 0]] # AD demographic_events = [ # Slaves arrival msprime.MassMigration(time=13.758620689655173, source=4, destination=0, proportion=0.098), # Colonials arrival msprime.MassMigration(time=T_AD-2, source=4, destination=1, proportion=0.443), # Admixed fraction grow from N_AD0 at rate r_AD at time T_MX msprime.PopulationParametersChange( time=T_AD, initial_size=N_AD0, growth_rate=r_AD, population_id=4), # As the admixed merge to MX, turn off their migration rates msprime.MigrationRateChange(time=T_AD, rate=0, matrix_index=(3, 4)), msprime.MigrationRateChange(time=T_AD, rate=0, matrix_index=(4, 3)), # Admixed fraction merges with MX trunk msprime.MassMigration(time=T_AD, source=4, destination=3, proportion=1.0),#0.459), # switch to standard coalescent #msprime.SimulationModelChange(T_AD, msprime.StandardCoalescent(1)), # Natives grow from N_MX0 at rate r_MX at time T_MX msprime.PopulationParametersChange( time=T_MX, initial_size=N_MX0, growth_rate=r_MX, population_id=3), # Natives merge into asian trunk msprime.MassMigration(time=T_MX, source=3, destination=2, proportion=1.0 ), # As the natives merge to the asians, turn off their growth rates msprime.PopulationParametersChange( time=T_MX, initial_size=N_MX0, growth_rate=0, population_id=3), # Asians grow from N_AS0 at rate r_AS at time T_EU_AS msprime.PopulationParametersChange( time=T_EU_AS, initial_size=N_AS0, growth_rate=r_AS, population_id=2 ), # Merge asian trunk wih european msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), # As the Asians merge to EUR, turn off their migration rates msprime.MigrationRateChange(time=T_EU_AS, rate=0, matrix_index=(1, 2)), msprime.MigrationRateChange(time=T_EU_AS, rate=0, matrix_index=(2, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=0, matrix_index=(0, 2)), msprime.MigrationRateChange(time=T_EU_AS, rate=0, matrix_index=(2, 0)), # As the Asians merge to EUR, turn off their growth rates msprime.PopulationParametersChange( time=T_EU_AS, initial_size=N_AS0, growth_rate=0, population_id=2), # Europeans grow from N_EU0 at rate r_EU at time T_EU_AS msprime.PopulationParametersChange( time=T_EU_AS, initial_size=N_EU0, growth_rate=r_EU, population_id=1 ), # Pop 1 (EUR/AS) size change at time T_B msprime.PopulationParametersChange(time=T_B, initial_size=N_EU0, growth_rate=0, population_id=1), # YRI merges with B at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0 ), # Set migrations to 0 msprime.MigrationRateChange(time=T_B, rate=0), # Pop 0 (AFR) size change at time T_B msprime.PopulationParametersChange(time=T_B, initial_size=N_B, growth_rate=0, population_id=0), # msprime.MigrationRateChange(time=T_B, rate=0), # Size changes to N_A at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ] # dp = msprime.DemographyDebugger( # Ne=N_A, # population_configurations=population_configurations, # migration_matrix=migration_matrix, # demographic_events=demographic_events) # dp.print_history() # with open('demography.txt', 'w') as fn: # dp.print_history(output=fn) if rec_map is not None: rmap = msprime.RecombinationMap.read_hapmap(rec_map) nvars = None rr = None else: rmap = None rr = 2e-8 settings = { #'model': msprime.DiscreteTimeWrightFisher(0.25), 'population_configurations': population_configurations, 'migration_matrix': migration_matrix, 'recombination_rate': rr, 'demographic_events': demographic_events, 'mutation_rate': 1.44e-8, # according to 10.1371/journal.pgen.1004023 'recombination_map': rmap, 'length': nvars } vcf_filename = "OOA_Latino.vcf.gz" if not os.path.isfile('Latino.hdf5'): ts = msprime.simulate(**settings) print("Original file contains ", ts.get_num_mutations(), "mutations") if maf is not None: ts = strip_singletons(ts, maf) print("New file contains ", ts.get_num_mutations(), "mutations") ts.dump('Latino.hdf5', True) with open(vcf_filename, "w") as vcf_file: ts.write_vcf(vcf_file, 2) else: ts = msprime.load('Latino.hdf5') if to_bed is not None: if split_out: split = zip(labels, nhaps) else: split = False make_plink(vcf_filename, to_bed, threads, split, focus_pops) if plot_pca: pca = skpca(vcf_filename[: vcf_filename.rfind('.vcf')], 2, threads, None, None) count = 0 for i, haps in enumerate(nhaps): haps = haps // 2 pca.loc[count:(count + haps - 1), 'continent'] = labels[i] count += haps colors = iter(['k', 'b', 'y', 'r', 'g', 'c']) fig, ax = plt.subplots() for c, df in pca.groupby('continent'): df.plot.scatter(x='PC1', y='PC2', c=next(colors), ax=ax, label=c) plt.savefig('simulationPCA.pdf')
def __init__(self): super().__init__() # First we set out the maximum likelihood values of the various parameters # given in Table 1. N_A = 7300 N_B = 2100 N_AF = 12300 N_EU0 = 1000 N_AS0 = 510 # Times are provided in years, so we convert into generations. self.generation_time = default_generation_time T_AF = 220e3 / self.generation_time T_B = 140e3 / self.generation_time T_EU_AS = 21.2e3 / self.generation_time # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two populations r_EU = 0.004 r_AS = 0.0055 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) # Migration rates during the various epochs. m_AF_B = 25e-5 m_AF_EU = 3e-5 m_AF_AS = 1.9e-5 m_EU_AS = 9.6e-5 # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_AF), msprime.PopulationConfiguration(initial_size=N_EU, growth_rate=r_EU), msprime.PopulationConfiguration(initial_size=N_AS, growth_rate=r_AS) ] self.migration_matrix = [ [0, m_AF_EU, m_AF_AS], # noqa [m_AF_EU, 0, m_EU_AS], # noqa [m_AF_AS, m_EU_AS, 0], # noqa ] self.demographic_events = [ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), # Size changes to N_A at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ]
def test_migration_rate_change(self): examples = [ msprime.MigrationRateChange(time=1, rate=1), msprime.MigrationRateChange(time=1, rate=1, source=1, dest=2), ] self.assert_repr_round_trip(examples)
def __init__(self): super().__init__() N0 = 7310 # initial population size Thum = 5920 # time (gens) of advent of modern humans Naf = 14474 # size of african population Tooa = 2040 # number of generations back to Out of Africa Nb = 1861 # size of out of Africa population mafb = 1.5e-4 # migration rate Africa and Out-of-Africa Teu = 920 # number generations back to Asia-Europe split Neu = 1032 # bottleneck population sizes Nas = 554 mafeu = 2.5e-5 # mig. rates mafas = 7.8e-6 meuas = 3.11e-5 reu = 0.0038 # growth rate per generation in Europe ras = 0.0048 # growth rate per generation in Asia Tadmix = 12 # time of admixture Nadmix = 30000 # initial size of admixed population radmix = .05 # growth rate of admixed population # pop0 is Africa, pop1 is Europe, pop2 is Asia, pop3 is admixed self.population_configurations = [ msprime.PopulationConfiguration(initial_size=Naf, growth_rate=0.0), msprime.PopulationConfiguration(initial_size=Neu * math.exp(reu * Teu), growth_rate=reu), msprime.PopulationConfiguration(initial_size=Nas * math.exp(ras * Teu), growth_rate=ras), msprime.PopulationConfiguration(initial_size=Nadmix * math.exp(radmix * Tadmix), growth_rate=radmix) ] self.migration_matrix = [[0, mafeu, mafas, 0], [mafeu, 0, meuas, 0], [mafas, meuas, 0, 0], [0, 0, 0, 0]] # Admixture event, 1/6 Africa, 2/6 Europe, 3/6 Asia admixture_event = [ msprime.MassMigration(time=Tadmix, source=3, destination=0, proportion=1.0 / 6.0), msprime.MassMigration(time=Tadmix + 0.0001, source=3, destination=1, proportion=2.0 / 5.0), msprime.MassMigration(time=Tadmix + 0.0002, source=3, destination=2, proportion=1.0) ] # Asia and Europe split eu_event = [ msprime.MigrationRateChange(time=Teu, rate=0.0), msprime.MassMigration(time=Teu + 0.0001, source=2, destination=1, proportion=1.0), msprime.PopulationParametersChange(time=Teu + 0.0002, initial_size=Nb, growth_rate=0.0, population_id=1), msprime.MigrationRateChange(time=Teu + 0.0003, rate=mafb, matrix_index=(0, 1)), msprime.MigrationRateChange(time=Teu + 0.0003, rate=mafb, matrix_index=(1, 0)) ] # Out of Africa event ooa_event = [ msprime.MigrationRateChange(time=Tooa, rate=0.0), msprime.MassMigration(time=Tooa + 0.0001, source=1, destination=0, proportion=1.0) ] # initial population size init_event = [ msprime.PopulationParametersChange(time=Thum, initial_size=N0, population_id=0) ] self.demographic_events = admixture_event + eu_event + ooa_event + init_event
def __init__(self, ta, n0=1, na=1, Ne=1e4, rec_rate=1e-4, loci=2, reps=100): """Initialize the model.""" generation_time = 25 T_AF = 148e3 / generation_time T_OOA = 51e3 / generation_time T_EU0 = 23e3 / generation_time T_EG = 5115 / generation_time # Growth rates r_EU0 = 0.00307 r_EU = 0.0195 r_AF = 0.0166 # population sizes N_A = 7310 N_AF1 = 14474 N_B = 1861 N_EU0 = 1032 N_EU1 = N_EU0 / np.exp(-r_EU0 * (T_EU0 - T_EG)) # migration rates m_AF_B = 15e-5 m_AF_EU = 2.5e-5 # present Ne N_EU = N_EU1 / np.exp(-r_EU * T_EG) N_AF = N_AF1 / np.exp(-r_AF * T_EG) population_configurations = [ msp.PopulationConfiguration(initial_size=N_AF, growth_rate=r_AF), msp.PopulationConfiguration(initial_size=N_EU, growth_rate=r_EU), ] migration_matrix = [[0, m_AF_EU], [m_AF_EU, 0]] demographic_events = [ msp.MigrationRateChange(time=T_EG, rate=m_AF_EU, matrix_index=(0, 1)), msp.MigrationRateChange(time=T_EG, rate=m_AF_EU, matrix_index=(1, 0)), msp.PopulationParametersChange(time=T_EG, growth_rate=r_EU0, initial_size=N_EU1, population_id=1), msp.PopulationParametersChange(time=T_EG, growth_rate=0, initial_size=N_AF1, population_id=0), msp.MigrationRateChange(time=T_EU0, rate=m_AF_B, matrix_index=(0, 1)), msp.MigrationRateChange(time=T_EU0, rate=m_AF_B, matrix_index=(1, 0)), msp.PopulationParametersChange(time=T_EU0, initial_size=N_B, growth_rate=0, population_id=1), msp.MassMigration(time=T_OOA, source=1, destination=0, proportion=1.0), msp.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0), ] self.pop_config = population_configurations self.migration_matrix = migration_matrix self.demography = demographic_events self.rec_rate = rec_rate self.loci = loci self.samples1 = [msp.Sample(population=1, time=0) for i in range(n0)] self.samples2 = [msp.Sample(population=1, time=ta) for i in range(na)] self.samples = self.samples1 + self.samples2 self.reps = reps self.Ne = Ne self.treeseq = None
def create_simulation_runner(parser, arg_list): """ Parses the arguments and returns a SimulationRunner instance. """ args = parser.parse_args(arg_list) if args.mutation_rate == 0 and not args.trees: parser.error("Need to specify at least one of --theta or --trees") num_loci = int(args.recombination[1]) if args.recombination[1] != num_loci: parser.error("Number of loci must be integer value") if args.recombination[0] != 0.0 and num_loci < 2: parser.error("Number of loci must > 1") r = 0.0 # We don't scale recombination or mutation rates by the size # of the region. if num_loci > 1: r = args.recombination[0] / (num_loci - 1) mu = args.mutation_rate / num_loci # ms uses a ratio to define the GC rate, but if the recombination rate # is zero we define the gc rate directly. gc_param, gc_tract_length = args.gene_conversion gc_rate = 0 if r == 0.0: if num_loci > 1: gc_rate = gc_param / (num_loci - 1) else: gc_rate = r * gc_param demography = msprime.Demography.isolated_model([1]) # Check the structure format. symmetric_migration_rate = 0.0 num_populations = 1 migration_matrix = [[0.0]] num_samples = [args.sample_size] if args.structure is not None: num_populations = convert_int(args.structure[0], parser) # We must have at least num_population sample_configurations if len(args.structure) < num_populations + 1: parser.error("Must have num_populations sample sizes") demography = msprime.Demography.isolated_model([1] * num_populations) num_samples = [0] * num_populations for j in range(num_populations): num_samples[j] = convert_int(args.structure[j + 1], parser) if sum(num_samples) != args.sample_size: parser.error("Population sample sizes must sum to sample_size") # We optionally have the overall migration_rate here if len(args.structure) == num_populations + 2: symmetric_migration_rate = convert_float( args.structure[num_populations + 1], parser ) check_migration_rate(parser, symmetric_migration_rate) elif len(args.structure) > num_populations + 2: parser.error("Too many arguments to --structure/-I") if num_populations > 1: migration_matrix = [ [ symmetric_migration_rate / (num_populations - 1) * int(j != k) for j in range(num_populations) ] for k in range(num_populations) ] else: if len(args.migration_matrix_entry) > 0: parser.error( "Cannot specify migration matrix entries without " "first providing a -I option" ) if args.migration_matrix is not None: parser.error( "Cannot specify a migration matrix without " "first providing a -I option" ) if args.migration_matrix is not None: migration_matrix = convert_migration_matrix( parser, args.migration_matrix, num_populations ) for matrix_entry in args.migration_matrix_entry: pop_i = convert_population_id(parser, matrix_entry[0], num_populations) pop_j = convert_population_id(parser, matrix_entry[1], num_populations) rate = matrix_entry[2] if pop_i == pop_j: parser.error("Cannot set diagonal elements in migration matrix") check_migration_rate(parser, rate) migration_matrix[pop_i][pop_j] = rate # Set the initial demography if args.growth_rate is not None: for population in demography.populations: population.growth_rate = args.growth_rate for population_id, growth_rate in args.population_growth_rate: pid = convert_population_id(parser, population_id, num_populations) demography.populations[pid].growth_rate = growth_rate for population_id, size in args.population_size: pid = convert_population_id(parser, population_id, num_populations) demography.populations[pid].initial_size = size demographic_events = [] # First we look at population split events. We do this differently # to ms, as msprime requires a fixed number of population. Therefore, # modify the number of populations to take into account populations # splits. This is a messy hack, and will probably need to be changed. for index, (t, population_id, proportion) in args.admixture: check_event_time(parser, t) pid = convert_population_id(parser, population_id, num_populations) if proportion < 0 or proportion > 1: parser.error("Proportion value must be 0 <= p <= 1.") # In ms, the probability of staying in source is p and the probabilty # of moving to the new population is 1 - p. event = (index, msprime.MassMigration(t, pid, num_populations, 1 - proportion)) demographic_events.append(event) num_populations += 1 # We add another element to each row in the migration matrix # along with an other row. All new entries are zero. for row in migration_matrix: row.append(0) migration_matrix.append([0 for j in range(num_populations)]) demography.populations.append(msprime.Population(initial_size=1)) num_samples.append(0) # Add the demographic events for index, (t, alpha) in args.growth_rate_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eG") check_event_time(parser, t) demographic_events.append( (index, msprime.PopulationParametersChange(time=t, growth_rate=alpha)) ) for index, (t, population_id, alpha) in args.population_growth_rate_change: pid = convert_population_id(parser, population_id, num_populations) check_event_time(parser, t) demographic_events.append( ( index, msprime.PopulationParametersChange( time=t, growth_rate=alpha, population_id=pid ), ) ) for index, (t, x) in args.size_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eN") check_event_time(parser, t) demographic_events.append( ( index, msprime.PopulationParametersChange( time=t, initial_size=x, growth_rate=0 ), ) ) for index, (t, population_id, x) in args.population_size_change: check_event_time(parser, t) pid = convert_population_id(parser, population_id, num_populations) demographic_events.append( ( index, msprime.PopulationParametersChange( time=t, initial_size=x, growth_rate=0, population_id=pid ), ) ) for index, (t, pop_i, pop_j) in args.population_split: check_event_time(parser, t) pop_i = convert_population_id(parser, pop_i, num_populations) pop_j = convert_population_id(parser, pop_j, num_populations) demographic_events.append((index, msprime.MassMigration(t, pop_i, pop_j, 1.0))) # Migration rates from subpopulation i (M[k, i], k != i) are set to zero. for k in range(num_populations): if k != pop_i: event = msprime.MigrationRateChange(t, 0.0, matrix_index=(k, pop_i)) demographic_events.append((index, event)) # Demographic events that affect the migration matrix if num_populations == 1: condition = ( len(args.migration_rate_change) > 0 or len(args.migration_matrix_entry_change) > 0 or len(args.migration_matrix_change) > 0 ) if condition: parser.error("Cannot change migration rates for 1 population") for index, (t, x) in args.migration_rate_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eM") check_migration_rate(parser, x) check_event_time(parser, t) event = msprime.MigrationRateChange(t, x / (num_populations - 1)) demographic_events.append((index, event)) for index, event in args.migration_matrix_entry_change: t = event[0] check_event_time(parser, t) pop_i = convert_population_id(parser, event[1], num_populations) pop_j = convert_population_id(parser, event[2], num_populations) if pop_i == pop_j: parser.error("Cannot set diagonal elements in migration matrix") rate = event[3] check_migration_rate(parser, rate) msp_event = msprime.MigrationRateChange(t, rate, matrix_index=(pop_i, pop_j)) demographic_events.append((index, msp_event)) for index, event in args.migration_matrix_change: if len(event) < 3: parser.error("Need at least three arguments to -ma") if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-ema") t = convert_float(event[0], parser) check_event_time(parser, t) if convert_int(event[1], parser) != num_populations: parser.error("num_populations must be equal for new migration matrix") matrix = convert_migration_matrix(parser, event[2:], num_populations) for j in range(num_populations): for k in range(num_populations): if j != k: msp_event = msprime.MigrationRateChange( t, matrix[j][k], matrix_index=(j, k) ) demographic_events.append((index, msp_event)) demographic_events.sort(key=lambda x: (x[0], x[1].time)) time_sorted = sorted(demographic_events, key=lambda x: x[1].time) if demographic_events != time_sorted: parser.error("Demographic events must be supplied in non-decreasing time order") demography.events = [event for _, event in demographic_events] demography.migration_matrix = migration_matrix # Adjust the population sizes so that the timescales agree. In principle # we could correct this with a ploidy value=0.5, but what we have here # seems less awful. for msp_event in demography.events: if isinstance(msp_event, msprime.PopulationParametersChange): if msp_event.initial_size is not None: msp_event.initial_size /= 2 for j, pop in enumerate(demography.populations): pop.initial_size /= 2 pop.name = f"pop_{j}" runner = SimulationRunner( num_samples, demography, num_loci=num_loci, num_replicates=args.num_replicates, recombination_rate=r, mutation_rate=mu, gene_conversion_rate=gc_rate, gene_conversion_tract_length=gc_tract_length, precision=args.precision, print_trees=args.trees, ms_random_seeds=args.random_seeds, hotspots=args.hotspots, ) return runner
#set up the initial population ss_each = args.ss * 2 sample_sizes = [ss_each] * (d) population_configurations = [ msprime.PopulationConfiguration(sample_size=k) for k in sample_sizes ] population_configurations.extend( [msprime.PopulationConfiguration(sample_size=0)] * 3) ############ set up the demography demog_list = [ #change migration rate to 0 on the 100th generation [msprime.MigrationRateChange(time=100, rate=0)], #move lineages to the north (deme = 0) or south (deme = 35) to create N-S gradient [ msprime.MassMigration(time=100, source=i, destination=0, proportion=1.0) for i in range(1, 6) ], [ msprime.MassMigration(time=100, source=i, destination=0, proportion=0.8) for i in range(6, 12) ], [ msprime.MassMigration(time=100,