def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species( id="tesspe", name="Test species", genome=genome) super().__init__( species=_species, name="test_map", url="http://example.com/genetic_map.tar.gz", file_pattern="prefix_{name}.txt")
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species(id="TesSpe", name="Test species", common_name="Testy McTestface", genome=genome) super().__init__(species=_species, id="test_annotation", url="http://example.com/annotation.gff.gz", zarr_url="http://example.com/annotation.zip", file_name="annotation.gff.gz")
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species(id="TesSpe", name="Test species", common_name="Testy McTestface", genome=genome) super().__init__( species=_species, id="test_map", url="http://example.com/genetic_map.tar.gz", sha256="1234", # url doesn't exist, so this will never be checked file_pattern="prefix_{name}.txt")
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species(id="TesSpe", name="Test species", common_name="Testy McTestface", genome=genome) super().__init__( species=_species, id="test_annotation", url="http://example.com/annotation.gff.gz", zarr_url="http://example.com/annotation.zip", zarr_sha256="1234", # this shouldn't be checked anywhere description="test annotation", )
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species( id="TesSpe", ensembl_id="test_species", name="Test species", common_name="Testy McTestface", genome=genome, ) super().__init__( species=_species, id="test_annotation", url="http://example.com/annotation.gff.gz", intervals_url="http://example.com/annotation.zip", intervals_sha256="1234", # this shouldn't be checked anywhere gff_sha256="6789", description="test annotation", file_pattern="yolo_{id}.txt", annotation_source="your mom", annotation_type="test", )
"LGg": _overall_rate, "LGh": _overall_rate, "MT": _overall_rate, } _genome = stdpopsim.Genome.from_data( genome_data.data, recombination_rate=_recombination_rate, mutation_rate=_mutation_rate, citations=[_BourgeoisEtAl], ) _species = stdpopsim.Species( id="AnoCar", ensembl_id="anolis_carolinensis", name="Anolis carolinensis", common_name="Anole lizard", genome=_genome, generation_time=1.5, # they live between 1-2 years after they are able to mate # they mature 8 to 9 months after they are born # can live up to 8 years in captivity population_size=3.05e6, # poulation size caculated from theta caculations # theta = 4Neu, theta from table 1 # Ne averaged across the 5 populations from BourgeoisEtAl citations=[_LovernEtAl, _BourgeoisEtAl], ) stdpopsim.register_species(_species)
), ], ) stdpopsim.utils.append_common_synonyms(_genome) _species = stdpopsim.Species( id="ChlRei", ensembl_id="chlamydomonas_reinhardtii", name="Chlamydomonas reinhardtii", common_name="Chlamydomonas reinhardtii", genome=_genome, generation_time=1 / 876, population_size=1.4 * 1e-7, citations=[ stdpopsim.Citation( author="Ness et al.", year=2016, doi="https://doi.org/10.1093/molbev/msv272", reasons={stdpopsim.CiteReason.POP_SIZE}, # Quebec population ), stdpopsim.Citation( author="Vítová et al", year=2011, doi="https://doi.org/10.1007/s00425-011-1427-7", reasons={stdpopsim.CiteReason.GEN_TIME}, ), ], ) stdpopsim.register_species(_species)
reasons={stdpopsim.CiteReason.ASSEMBLY}, ) _genome = stdpopsim.Genome.from_data( genome_data.data, recombination_rate=_recombination_rate, mutation_rate=_mutation_rate, citations=[ _NeneEtAl, _JunejaEtAl, _CrawfordEtAl, _KeightleyEtAl, ], ) _species = stdpopsim.Species( id="AedAeg", ensembl_id="aedes_aegypti_lvpagwg", name="Aedes aegypti", common_name="Yellow fever mosquito", genome=_genome, generation_time=1 / 15, # the estimated population size today the modern Senegal forest population population_size=1e6, citations=[_CrawfordEtAl], ) stdpopsim.register_species(_species)
stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Harland et al. (2017), sex-averaged estimate per bp per generation. mutation_rate=1.2e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, citations=[ _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE), _MaEtAl.because(stdpopsim.CiteReason.REC_RATE), _RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY), ], ) _species = stdpopsim.Species( id="BosTau", ensembl_id="bos_taurus", name="Bos Taurus", common_name="Cattle", genome=_genome, generation_time=5, population_size=62000, citations=[_MacLeodEtAl], ) stdpopsim.register_species(_species)
_species = stdpopsim.Species( id="AnaPla", ensembl_id="anas_platyrhynchos", name="Anas platyrhynchos", common_name="Mallard", # description="The 'mallard' species complex consists of 14 hybridizing and " # "recently diverged species living around the world, ranging from the holarctic " # "mallard with >15M individuals today in North America alone to " # "endangered endemics in Hawaii and New Zealand. The assembly, " # "recombination rates, and default Ne were estimtaed with wild Chinese " # "mallards.", genome=_genome, # generation time estimate from Lavertsky et al. (2020): # Generation time (G) was calculated as G = \alpha + (s/(1 − s)), # where \alpha is the age of maturity and s is the expected adult # survival rate (Sather et al., 2005). The age of maturity for mallard- # like ducks generally is one year (i.e., \alpha = 1), and the average # adult survival rate is 0.54 (range: 0.34–0.74) and 0.54 (range: 0.4–0.70) # for mallards and black ducks, respectively (Nichols, Obrecht, & Hines, 1987). # Using an overall survival rate average of 0.54 for the two species, we # estimated the generation time to be 4.0 years. generation_time=4, # choosing Ne based on theta = 4 Ne u from Guo et al 2021 # theta = 0.003 (Figure 1), u as above (the paper uses a rate from chicken) population_size=156000, citations=[ _LavretskyEtAl2020, _GuoEtAl2020, ], )
assembly_citations=[ stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", year="2007", author="Swarbreck et al.", reasons={stdpopsim.CiteReason.ASSEMBLY})]) _species = stdpopsim.Species( id="AraTha", name="Arabidopsis thaliana", common_name="A. thaliana", genome=_genome, generation_time=1.0, generation_time_citations=[stdpopsim.Citation( doi="https://doi.org/10.1890/0012-9658(2002)083[1006:GTINSO]2.0.CO;2", year="2002", author="Donohue", reasons={stdpopsim.CiteReason.GEN_TIME})], population_size=10**4, population_size_citations=[stdpopsim.Citation( doi="https://doi.org/10.1016/j.cell.2016.05.063", year="2016", author="1001GenomesConsortium", reasons={stdpopsim.CiteReason.POP_SIZE})] ) stdpopsim.register_species(_species) ########################################################### # # Genetic maps #
)) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE), _DosSantosEtAl, _HoskinsEtAl, _ComeronEtAl.because(stdpopsim.CiteReason.REC_RATE), ], ) stdpopsim.utils.append_common_synonyms(_genome) _species = stdpopsim.Species( id="DroMel", ensembl_id="drosophila_melanogaster", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, generation_time=0.1, # Population size is the older of two population sizes estimated by # Li and Stephan in a two-epoch model of African populations. # N_A0 is given as 8.603e6, and N_A1 (used here) is 5 times smaller. population_size=1720600, citations=[_LiAndStephan], ) stdpopsim.register_species(_species)
mutation_rate=1.5e-8, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _nater2017.because( stdpopsim.CiteReason.MUT_RATE) ]) _species = stdpopsim.Species( id="PonAbe", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, generation_time=20, generation_time_citations=[ _locke2011.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=1.79e4, population_size_citations=[ _locke2011.because(stdpopsim.CiteReason.POP_SIZE) ]) stdpopsim.register_species(_species) ########################################################### # # Genetic maps # ###########################################################
_NelsonEtAl = stdpopsim.Citation( doi="https://doi.org/10.1111/mec.14122", year=2017, author="Nelson et al.", reasons={stdpopsim.CiteReason.GEN_TIME}, ) _WallbergEtAl = stdpopsim.Citation( doi="https://doi.org/10.1038/ng.3077", year=2014, author="Wallberg et al.", reasons={stdpopsim.CiteReason.POP_SIZE}, ) _species = stdpopsim.Species( id="ApiMel", ensembl_id="apis_mellifera", name="Apis mellifera", common_name="Apis mellifera (DH4)", genome=_genome, generation_time=2, population_size=2e05, citations=[ _WallbergEtAl, _NelsonEtAl, ], ) stdpopsim.register_species(_species)
_chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split()[:3] _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=1e-8, # WRONG!, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome(chromosomes=_chromosomes) _species = stdpopsim.Species( id="homsap", name="H**o sapiens", genome=_genome, # TODO reference for these generation_time=25, population_size=10**4) stdpopsim.register_species(_species) ########################################################### # # Genetic maps # ########################################################### _gm = stdpopsim.GeneticMap( species=_species, name="HapmapII_GRCh37",
author="Keightley et al", year=2015, doi="https://doi.org/10.1093/molbev/msu302", reasons={stdpopsim.CiteReason.MUT_RATE}, ), ], ) stdpopsim.utils.append_common_synonyms(_genome) _species = stdpopsim.Species( id="HelMel", ensembl_id="heliconius_melpomene", name="Heliconius melpomene", common_name="Heliconius melpomene", genome=_genome, generation_time=35 / 365, # 35 days population_size=2111109, citations=[ stdpopsim.Citation( author="Pardo-Diaz et al", year=2012, doi="https://doi.org/10.1371/journal.pgen.1002752", reasons={ stdpopsim.CiteReason.POP_SIZE, stdpopsim.CiteReason.GEN_TIME }, ), ], ) stdpopsim.register_species(_species)
mutation_rate=2.0e-8, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _locke2011.because( stdpopsim.CiteReason.MUT_RATE) ]) _species = stdpopsim.Species( id="PonPyg", name="Pongo pygmaeus", common_name="Bornean orangutan", genome=_genome, generation_time=20, generation_time_citations=[ _locke2011.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=1.79e4, population_size_citations=[ _locke2011.because(stdpopsim.CiteReason.POP_SIZE) ]) stdpopsim.register_species(_species) ########################################################### # # Genetic maps # ###########################################################
# So we use the value of 1 generation per day. # population size # We estimate it from the Watterson estimator : # theta = 2.Ne.mu = S / sum_{i=1}^{k=n-1}(1/k) # With n the number of samples and S the number of segregating sites. # From Da Cunha et al, we have # S = 3922 / 1.86Mb = 2.1×10−3 SNP/bp; k = 79; mu=1.53×10−9 SNP/bp/generation # So Ne ~ 140000 _species = stdpopsim.Species( id="StrAga", ensembl_id="NA", name="Streptococcus agalactiae", common_name="Group B Streptococcus", genome=_genome, generation_time=1 / 365, # year / generations population_size=140000, citations=[ _DaCunha_et_al.because(stdpopsim.CiteReason.POP_SIZE), stdpopsim.Citation( author="Savageau M.A.", year=1983, doi="https://doi.org/10.1086/284168", reasons={stdpopsim.CiteReason.GEN_TIME}, ), ], ) stdpopsim.register_species(_species)
) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE), ], recombination_rate_citations=[_MaEtAl.because(stdpopsim.CiteReason.REC_RATE)], assembly_citations=[_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY)], ) _species = stdpopsim.Species( id="BosTau", name="Bos Taurus", common_name="Cattle", genome=_genome, generation_time=5, generation_time_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.GEN_TIME)], population_size=62000, population_size_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.POP_SIZE)], ) stdpopsim.register_species(_species) ########################################################### # # Demographic models # ###########################################################
chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _wielgoss_et_al.because(stdpopsim.CiteReason.MUT_RATE), ], assembly_citations=[_blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)], ) _species = stdpopsim.Species( id="EscCol", name="Escherichia coli", common_name="E. coli", # We use the K-12 strain, because the parameters we're using more # closely match this strain than the ensembl default (HUSEC2011). ensembl_id="escherichia_coli_str_k_12_substr_mg1655_gca_000005845", genome=_genome, # E. coli K-12 strain MG1655 "doubling time during steady-state growth in # Luria-Bertani broth was 20 min". generation_time=0.00003805175, # 1.0 / (525600 min/year / 20 min/gen) generation_time_citations=[_sezonov_et_al.because(stdpopsim.CiteReason.GEN_TIME)], # Hartl et al. calculated Ne for "natural isolates of E. coli", # assuming mu=5e-10 (from Drake 1991). population_size=1.8e8, population_size_citations=[_hartl_et_al.because(stdpopsim.CiteReason.POP_SIZE)], ) stdpopsim.register_species(_species)
mutation_rate=1e-5+2e-4, recombination_rate=0.0)) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! #: :class:`stdpopsim.Genome` definition for E. Coli. # Chromosome length data is based on strain K-12. _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _perfeito_et_al.because(stdpopsim.CiteReason.MUT_RATE), _kibota_and_lynch.because(stdpopsim.CiteReason.MUT_RATE), ], assembly_citations=[ _blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)]) _species = stdpopsim.Species( id="EscCol", name="Escherichia coli", common_name="E. coli", genome=_genome, generation_time=0.00003805175, # 1.0 / (525600 min/year / 20 min/gen) generation_time_citations=[ _sezonov_et_al.because(stdpopsim.CiteReason.GEN_TIME)], population_size=1.8e8, population_size_citations=[ _lapierre_et_al.because(stdpopsim.CiteReason.POP_SIZE)]) stdpopsim.register_species(_species)
# We could not auto-pull the genome data from ensemble # so instead we used the most up-to-date assembly # currently available from NCBI. _genome = stdpopsim.Genome.from_data( genome_data.data, recombination_rate=_recombination_rate, mutation_rate=_mutation_rate, citations=[ _ChakrabortyEtAl, _ComeronEtAl, _LegrandEtAl, ], ) # Generation time was set to that used by # by Legrand et al. in an ABC selection of demographic # scenarios (page 1200). # Population size was estimated in the same paper (page 1202). _species = stdpopsim.Species( id="DroSec", ensembl_id="drosophila_sechellia", name="Drosophila sechellia", common_name="Drosophila sechellia", genome=_genome, generation_time=0.05, population_size=100000, citations=[_LegrandEtAl], ) stdpopsim.register_species(_species)
mutation_rate=5.49e-9, # _SchriderEtAl de novo mutation rate recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE), _DosSantosEtAl, _HoskinsEtAl, _ComeronEtAl.because(stdpopsim.CiteReason.REC_RATE), ], ) _species = stdpopsim.Species( id="DroMel", ensembl_id="drosophila_melanogaster", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, generation_time=0.1, population_size=1720600, citations=[_LiAndStephan], ) stdpopsim.register_species(_species)
reasons={stdpopsim.CiteReason.REC_RATE}, ), stdpopsim.Citation( author="Liu et al.", year=2016, doi="https://10.1111/mec.13827", reasons={stdpopsim.CiteReason.MUT_RATE}, ), ], ) _species = stdpopsim.Species( id="GasAcu", ensembl_id="9307941", name="Gasterosteus aculeatus", common_name="Three-spined stickleback", genome=_genome, generation_time=1, population_size=1e4, citations=[ stdpopsim.Citation( author="Liu et al.", year=2016, doi="https://10.1111/mec.13827", reasons={stdpopsim.CiteReason.POP_SIZE, stdpopsim.CiteReason.GEN_TIME}, ), ], ) stdpopsim.register_species(_species)
chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _nater2017.because(stdpopsim.CiteReason.MUT_RATE) ], ) _species = stdpopsim.Species( id="PonAbe", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, # generation time used by Locke et al. without further citation generation_time=20, generation_time_citations=[ _locke2011.because(stdpopsim.CiteReason.GEN_TIME) ], # Locke et al. inferred ancestral Ne population_size=1.79e4, population_size_citations=[ _locke2011.because(stdpopsim.CiteReason.POP_SIZE) ], ) stdpopsim.register_species(_species) ########################################################### # # Genetic maps # ###########################################################
recombination_rate_citations=[ _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE) ], assembly_citations=[ _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY) ], ) _species = stdpopsim.Species( id="CanFam", name="Canis familiaris", common_name="Dog", genome=_genome, generation_time=3, generation_time_citations=[ # Everyone uses 3 years because everyone else uses it. # It's likely higher, at least in wolves: # https://pubs.er.usgs.gov/publication/70187564 ], population_size=13000, # ancestral dog size population_size_citations=[ _LindbladTohEtAl.because(stdpopsim.CiteReason.POP_SIZE) ], ) stdpopsim.register_species(_species) _gm = stdpopsim.GeneticMap( species=_species, id="Campbell2016_CanFam3_1", description="Pedigree-based crossover map from 237 individuals", long_description="""
# based on `dm6 <https://www.ncbi.nlm.nih.gov/assembly/GCF_000001215.4/>`_. _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _SchriderEtAl.because( stdpopsim.CiteReason.MUT_RATE) ], assembly_citations=[_DosSantosEtAl]) _species = stdpopsim.Species( id="DroMel", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, generation_time=0.1, generation_time_citations=[ _LiAndStephan.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=1720600, population_size_citations=[ _LiAndStephan.because(stdpopsim.CiteReason.POP_SIZE) ]) stdpopsim.register_species(_species) ########################################################### # # Genetic maps # ###########################################################
recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _genome2001, _tian2019.because(stdpopsim.CiteReason.MUT_RATE), _hapmap2007.because(stdpopsim.CiteReason.REC_RATE), ], ) stdpopsim.utils.append_common_synonyms(_genome) _species = stdpopsim.Species( id="HomSap", ensembl_id="homo_sapiens", name="H**o sapiens", common_name="Human", genome=_genome, generation_time=30, population_size=10**4, citations=[ _tremblay2000.because(stdpopsim.CiteReason.GEN_TIME), _takahata1993.because(stdpopsim.CiteReason.POP_SIZE), ], ) stdpopsim.register_species(_species)
_CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE), _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY), ], ) _species = stdpopsim.Species( id="CanFam", ensembl_id="canis_familiaris", name="Canis familiaris", common_name="Dog", genome=_genome, population_size=13000, # ancestral dog size generation_time=3, citations=[ # Everyone uses 3 years for generation time because everyone else uses it. # It's likely higher, at least in wolves: # https://academic.oup.com/mbe/article/35/6/1366/4990884 # Reasoning behind a generation time of 3 years: # Consider two use cases for CanFam simulations: # (1) for domestic dog simulations, and (2) for wolf+dog simulations # (or ancestral dogs). # In case (1), maybe 3 year generations are more appropriate because of human # intervention in breeding. In case (2), you might want to match what other # studies have done (thus using 3 year generations), or you might want to # consider what is known about modern wolves. _LindbladTohEtAl.because(stdpopsim.CiteReason.POP_SIZE) ], ) stdpopsim.register_species(_species)
length=data["length"], synonyms=data["synonyms"], # Nater et al. 2017 used mu=1.5e-8 per generation, based on the # assumption that it's similar to humans and chimps. mutation_rate=1.5e-8, recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[_nater2017], ) _species = stdpopsim.Species( id="PonAbe", ensembl_id="pongo_abelii", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, # generation time used by Locke et al. without further citation generation_time=20, # Locke et al. inferred ancestral Ne population_size=1.79e4, citations=[_locke2011], ) stdpopsim.register_species(_species)