Пример #1
0
 def __init__(self):
     genome = stdpopsim.Genome(chromosomes=[])
     _species = stdpopsim.Species(
         id="tesspe", name="Test species", genome=genome)
     super().__init__(
         species=_species,
         name="test_map",
         url="http://example.com/genetic_map.tar.gz",
         file_pattern="prefix_{name}.txt")
Пример #2
0
 def __init__(self):
     genome = stdpopsim.Genome(chromosomes=[])
     _species = stdpopsim.Species(id="TesSpe",
                                  name="Test species",
                                  common_name="Testy McTestface",
                                  genome=genome)
     super().__init__(species=_species,
                      id="test_annotation",
                      url="http://example.com/annotation.gff.gz",
                      zarr_url="http://example.com/annotation.zip",
                      file_name="annotation.gff.gz")
Пример #3
0
 def __init__(self):
     genome = stdpopsim.Genome(chromosomes=[])
     _species = stdpopsim.Species(id="TesSpe",
                                  name="Test species",
                                  common_name="Testy McTestface",
                                  genome=genome)
     super().__init__(
         species=_species,
         id="test_map",
         url="http://example.com/genetic_map.tar.gz",
         sha256="1234",  # url doesn't exist, so this will never be checked
         file_pattern="prefix_{name}.txt")
Пример #4
0
 def __init__(self):
     genome = stdpopsim.Genome(chromosomes=[])
     _species = stdpopsim.Species(id="TesSpe",
                                  name="Test species",
                                  common_name="Testy McTestface",
                                  genome=genome)
     super().__init__(
         species=_species,
         id="test_annotation",
         url="http://example.com/annotation.gff.gz",
         zarr_url="http://example.com/annotation.zip",
         zarr_sha256="1234",  # this shouldn't be checked anywhere
         description="test annotation",
     )
Пример #5
0
 def __init__(self):
     genome = stdpopsim.Genome(chromosomes=[])
     _species = stdpopsim.Species(
         id="TesSpe",
         ensembl_id="test_species",
         name="Test species",
         common_name="Testy McTestface",
         genome=genome,
     )
     super().__init__(
         species=_species,
         id="test_annotation",
         url="http://example.com/annotation.gff.gz",
         intervals_url="http://example.com/annotation.zip",
         intervals_sha256="1234",  # this shouldn't be checked anywhere
         gff_sha256="6789",
         description="test annotation",
         file_pattern="yolo_{id}.txt",
         annotation_source="your mom",
         annotation_type="test",
     )
        length=4641652,
        # Lapierre et al. (2016) refer to:
        #  Genomic adaptive mutation rate: 1e-5, Perfeito et al. (2007), and
        #  Genomic deleterious mutation rate: 2e−4, Kibota and Lynch (1996).
        mutation_rate=1e-5+2e-4,
        recombination_rate=0.0))
# mean_conversion_rate=8.9e-11 # not implemented yet!
# mean_conversion_length=542 # not implemented yet!

#: :class:`stdpopsim.Genome` definition for E. Coli.
# Chromosome length data is based on strain K-12.

_genome = stdpopsim.Genome(
        chromosomes=_chromosomes,
        mutation_rate_citations=[
            _perfeito_et_al.because(stdpopsim.CiteReason.MUT_RATE),
            _kibota_and_lynch.because(stdpopsim.CiteReason.MUT_RATE),
            ],
        assembly_citations=[
            _blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)])

_species = stdpopsim.Species(
    id="EscCol",
    name="Escherichia coli",
    common_name="E. coli",
    genome=_genome,
    generation_time=0.00003805175,  # 1.0 / (525600 min/year / 20 min/gen)
    generation_time_citations=[
        _sezonov_et_al.because(stdpopsim.CiteReason.GEN_TIME)],
    population_size=1.8e8,
    population_size_citations=[
        _lapierre_et_al.because(stdpopsim.CiteReason.POP_SIZE)])
Пример #7
0
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            # Harland et al. (2017), sex-averaged estimate per bp per generation.
            mutation_rate=1.2e-8,
            recombination_rate=_recombination_rate_data[name],
        )
    )

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    mutation_rate_citations=[
        _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE),
    ],
    recombination_rate_citations=[_MaEtAl.because(stdpopsim.CiteReason.REC_RATE)],
    assembly_citations=[_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY)],
)

_species = stdpopsim.Species(
    id="BosTau",
    name="Bos Taurus",
    common_name="Cattle",
    genome=_genome,
    generation_time=5,
    generation_time_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.GEN_TIME)],
    population_size=62000,
    population_size_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.POP_SIZE)],
)
Пример #8
0
_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            # Harland et al. (2017), sex-averaged estimate per bp per generation.
            mutation_rate=1.2e-8,
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    citations=[
        _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE),
        _MaEtAl.because(stdpopsim.CiteReason.REC_RATE),
        _RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY),
    ],
)

_species = stdpopsim.Species(
    id="BosTau",
    ensembl_id="bos_taurus",
    name="Bos Taurus",
    common_name="Cattle",
    genome=_genome,
    generation_time=5,
    population_size=62000,
    citations=[_MacLeodEtAl],
)
Пример #9
0
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            # Nater et al. 2017 used mu=1.5e-8 per generation, based on the
            # assumption that it's similar to humans and chimps.
            mutation_rate=1.5e-8,
            recombination_rate=_recombination_rate_data[name],
        )
    )

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    citations=[_nater2017],
)

_species = stdpopsim.Species(
    id="PonAbe",
    ensembl_id="pongo_abelii",
    name="Pongo abelii",
    common_name="Sumatran orangutan",
    genome=_genome,
    # generation time used by Locke et al. without further citation
    generation_time=20,
    # Locke et al. inferred ancestral Ne
    population_size=1.79e4,
    citations=[_locke2011],
)
Пример #10
0
    name, length = line.split()[:2]
    _chromosomes.append(stdpopsim.Chromosome(
        id=name, length=int(length),
        mutation_rate=7e-9,
        recombination_rate=200 / 124000 / 2 / 1e6))

_genome = stdpopsim.Genome(
        chromosomes=_chromosomes,
        mutation_rate_citations=[
            stdpopsim.Citation(
                author="Ossowski et al.",
                year="2010",
                doi="https://doi.org/10.1126/science.1180677",
                reasons={stdpopsim.CiteReason.MUT_RATE})],
        recombination_rate_citations=[
            stdpopsim.Citation(
                author="Huber et al.",
                year="2014",
                doi="https://doi.org/10.1093/molbev/msu247",
                reasons={stdpopsim.CiteReason.REC_RATE})],
        assembly_citations=[
            stdpopsim.Citation(
                doi="https://doi.org/10.1093/nar/gkm965",
                year="2007",
                author="Swarbreck et al.",
                reasons={stdpopsim.CiteReason.ASSEMBLY})])

_species = stdpopsim.Species(
    id="AraTha",
    name="Arabidopsis thaliana",
    common_name="A. thaliana",
    genome=_genome,
Пример #11
0
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            # Nater et al. 2017 used mu=1.5e-8 per generation, based on the
            # assumption that it's similar to humans and chimps.
            mutation_rate=1.5e-8,
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    mutation_rate_citations=[
        _nater2017.because(stdpopsim.CiteReason.MUT_RATE)
    ],
)

_species = stdpopsim.Species(
    id="PonAbe",
    name="Pongo abelii",
    common_name="Sumatran orangutan",
    genome=_genome,
    # generation time used by Locke et al. without further citation
    generation_time=20,
    generation_time_citations=[
        _locke2011.because(stdpopsim.CiteReason.GEN_TIME)
    ],
    # Locke et al. inferred ancestral Ne
Пример #12
0
            synonyms=data["synonyms"],
            # Wielgoss et al. (2011) calculated for strain REL606,
            # from synonymous substitutions over 40,000 generations.
            mutation_rate=8.9e-11,
            recombination_rate=0.0,
        )
    )

# mean_conversion_rate=8.9e-11 # not implemented yet!
# mean_conversion_length=542 # not implemented yet!

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    mutation_rate_citations=[
        _wielgoss_et_al.because(stdpopsim.CiteReason.MUT_RATE),
    ],
    assembly_citations=[_blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)],
)


_species = stdpopsim.Species(
    id="EscCol",
    name="Escherichia coli",
    common_name="E. coli",
    # We use the K-12 strain, because the parameters we're using more
    # closely match this strain than the ensembl default (HUSEC2011).
    ensembl_id="escherichia_coli_str_k_12_substr_mg1655_gca_000005845",
    genome=_genome,
    # E. coli K-12 strain MG1655 "doubling time during steady-state growth in
Пример #13
0
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=5.49e-9,  # _SchriderEtAl de novo mutation rate
            recombination_rate=_recombination_rate_data[name],
        )
    )


_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    citations=[
        _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE),
        _DosSantosEtAl,
        _HoskinsEtAl,
        _ComeronEtAl.because(stdpopsim.CiteReason.REC_RATE),
    ],
)

_species = stdpopsim.Species(
    id="DroMel",
    ensembl_id="drosophila_melanogaster",
    name="Drosophila melanogaster",
    common_name="D. melanogaster",
    genome=_genome,
    generation_time=0.1,
    population_size=1720600,
    citations=[_LiAndStephan],
Пример #14
0
chr22 	 51304566 	 1.4445022767788226e-08
chrX 	 155270560 	 1.164662223273842e-08
chrY 	 59373566 	 0.0
"""

_chromosomes = []
for line in _chromosome_data.splitlines():
    name, length, mean_rr = line.split()[:3]
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=int(length),
            mutation_rate=1e-8,  # WRONG!,
            recombination_rate=float(mean_rr)))

_genome = stdpopsim.Genome(chromosomes=_chromosomes)

_species = stdpopsim.Species(
    id="homsap",
    name="H**o sapiens",
    genome=_genome,
    # TODO reference for these
    generation_time=25,
    population_size=10**4)

stdpopsim.register_species(_species)

###########################################################
#
# Genetic maps
#
Пример #15
0
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=1.84e-9,  # _Konrad et al. de-nove mutation rate,
            # it's not uniform and it's much better to use a mutation map.
            # mutation_rate=_mutation_rate_data[name],
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    citations=[
        _genome1998,
        _KonradEtAl2019.because(stdpopsim.CiteReason.MUT_RATE),
        _KonradEtAl2017.because(stdpopsim.CiteReason.MUT_RATE),
        _Rockman2009.because(stdpopsim.CiteReason.REC_RATE),
    ],
)

_species = stdpopsim.Species(
    id="CaeEle",
    ensembl_id="",
    name="Caenorhabditis elegans",
    common_name="C. elegans",
    genome=_genome,
    generation_time=0.01,  # the generation time in the lab ~150
    # generation per year (0.00666), it should be less in the wild
    population_size=10000,
Пример #16
0
for line in _chromosome_data.splitlines():
    name, length, mean_rr = line.split()
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=int(length),
            mutation_rate=4e-9,  # based on non-CpG sites only
            recombination_rate=float(mean_rr)))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    mutation_rate_citations=[
        _SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE),
        _FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE),
    ],
    recombination_rate_citations=[
        _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE)
    ],
    assembly_citations=[
        _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY)
    ],
)

_species = stdpopsim.Species(
    id="CanFam",
    name="Canis familiaris",
    common_name="Dog",
    genome=_genome,
    generation_time=3,
    generation_time_citations=[
        # Everyone uses 3 years because everyone else uses it.
Пример #17
0
_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=1.29e-8,
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    mutation_rate_citations=[_tian2019.because(stdpopsim.CiteReason.MUT_RATE)],
    recombination_rate_citations=[
        _hapmap2007.because(stdpopsim.CiteReason.REC_RATE)
    ],
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    assembly_citations=[_genome2001],
)

_species = stdpopsim.Species(
    id="HomSap",
    name="H**o sapiens",
    common_name="Human",
    genome=_genome,
    generation_time=30,
    generation_time_citations=[
        _tremblay2000.because(stdpopsim.CiteReason.GEN_TIME)
    ],
    population_size=10**4,
Пример #18
0
    author="Nater et al.",
    year=2017,
    doi="https://doi.org/10.1016/j.cub.2017.09.047")

_chromosomes = []
for line in _chromosome_data.splitlines():
    name, length, mean_rr = line.split()[:3]
    _chromosomes.append(
        stdpopsim.Chromosome(id=name,
                             length=int(length),
                             mutation_rate=1.5e-8,
                             recombination_rate=float(mean_rr)))

_genome = stdpopsim.Genome(chromosomes=_chromosomes,
                           mutation_rate_citations=[
                               _nater2017.because(
                                   stdpopsim.CiteReason.MUT_RATE)
                           ])

_species = stdpopsim.Species(
    id="PonAbe",
    name="Pongo abelii",
    common_name="Sumatran orangutan",
    genome=_genome,
    generation_time=20,
    generation_time_citations=[
        _locke2011.because(stdpopsim.CiteReason.GEN_TIME)
    ],
    population_size=1.79e4,
    population_size_citations=[
        _locke2011.because(stdpopsim.CiteReason.POP_SIZE)
Пример #19
0
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    citations=[
        stdpopsim.Citation(
            author="Ossowski et al.",
            year=2010,
            doi="https://doi.org/10.1126/science.1180677",
            reasons={stdpopsim.CiteReason.MUT_RATE},
        ),
        stdpopsim.Citation(
            author="Huber et al.",
            year=2014,
            doi="https://doi.org/10.1093/molbev/msu247",
            reasons={stdpopsim.CiteReason.REC_RATE},
        ),
        stdpopsim.Citation(
            doi="https://doi.org/10.1093/nar/gkm965",
            year=2007,
            author="Swarbreck et al.",
            reasons={stdpopsim.CiteReason.ASSEMBLY},
        ),
    ],
)
stdpopsim.utils.append_common_synonyms(_genome)

_species = stdpopsim.Species(
Пример #20
0
_recombination_rate_data["mitochondrion_genome"] = 0

_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=5.49e-9,  # citation: _SchriderEtAl
            recombination_rate=_recombination_rate_data[name]))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    mutation_rate_citations=[
        _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE)
    ],
    assembly_citations=[_DosSantosEtAl])

_species = stdpopsim.Species(
    id="DroMel",
    name="Drosophila melanogaster",
    common_name="D. melanogaster",
    genome=_genome,
    generation_time=0.1,
    generation_time_citations=[
        _LiAndStephan.because(stdpopsim.CiteReason.GEN_TIME)
    ],
    population_size=1720600,
    population_size_citations=[
Пример #21
0
for line in _chromosome_data.splitlines():
    name, length = line.split()[:2]
    _chromosomes.append(stdpopsim.Chromosome(
        id=name, length=int(length),
        mutation_rate=7e-9,
        recombination_rate=8.1e-9))

_SwarbreckEtAl = stdpopsim.Citation(
    doi="https://doi.org/10.1093/nar/gkm965",
    year="2007",
    author="Swarbreck et al.",
    reasons={stdpopsim.CiteReason.ASSEMBLY}
)

_genome = stdpopsim.Genome(
        chromosomes=_chromosomes,
        assembly_citations=[
            _SwarbreckEtAl])

_species = stdpopsim.Species(
    id="AraTha",
    name="Arabidopsis thaliana",
    common_name="A. thaliana",
    genome=_genome,
    generation_time=1.0,
    generation_time_citations=[stdpopsim.Citation(
        doi="https://doi.org/10.1890/0012-9658(2002)083[1006:GTINSO]2.0.CO;2",
        year="2002",
        author="Donohue",
        reasons={stdpopsim.CiteReason.GEN_TIME})],
    population_size=10**4,
    population_size_citations=[stdpopsim.Citation(
Пример #22
0
for line in _chromosome_data.splitlines():
    name, length = line.split()[:2]
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=int(length),
            mutation_rate=5.49e-9,  # citation: _SchriderEtAl
            recombination_rate=8.4e-9))  # WRONG, underestimate used in S&S!

# TODO need to port this documentation somewhere.
# class:`stdpopsim.Genome` definition for D. melanogaster. Chromosome length data is
# based on `dm6 <https://www.ncbi.nlm.nih.gov/assembly/GCF_000001215.4/>`_.

_genome = stdpopsim.Genome(chromosomes=_chromosomes,
                           mutation_rate_citations=[
                               _SchriderEtAl.because(
                                   stdpopsim.CiteReason.MUT_RATE)
                           ],
                           assembly_citations=[_DosSantosEtAl])

_species = stdpopsim.Species(
    id="DroMel",
    name="Drosophila melanogaster",
    common_name="D. melanogaster",
    genome=_genome,
    generation_time=0.1,
    generation_time_citations=[
        _LiAndStephan.because(stdpopsim.CiteReason.GEN_TIME)
    ],
    population_size=1720600,
    population_size_citations=[
        _LiAndStephan.because(stdpopsim.CiteReason.POP_SIZE)
Пример #23
0
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            # Harland et al. (2017), sex-averaged estimate per bp per generation.
            mutation_rate=1.2e-8,
            recombination_rate=_recombination_rate_data[name],
        )
    )

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    citations=[
        _RosenEtAl,
        _HarlandEtAl,
        _MaEtAl,
    ],
)

_species = stdpopsim.Species(
    id="BosTau",
    ensembl_id="bos_taurus",
    name="Bos taurus",
    common_name="Cattle",
    genome=_genome,
    generation_time=5,
    population_size=90,  # most recent Ne in _MacLeodEtAl
    citations=[_MacLeodEtAl],
)
Пример #24
0
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=4e-9,  # based on non-CpG sites only
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    citations=[
        _SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE),
        _FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE),
        _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE),
        _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY),
    ],
)

_species = stdpopsim.Species(
    id="CanFam",
    ensembl_id="canis_familiaris",
    name="Canis familiaris",
    common_name="Dog",
    genome=_genome,
    population_size=13000,  # ancestral dog size
    generation_time=3,
    citations=[
Пример #25
0
_chromosomes = []
for name, data in genome_data.data["chromosomes"].items():
    _chromosomes.append(
        stdpopsim.Chromosome(
            id=name,
            length=data["length"],
            synonyms=data["synonyms"],
            mutation_rate=1.29e-8,
            recombination_rate=_recombination_rate_data[name],
        ))

_genome = stdpopsim.Genome(
    chromosomes=_chromosomes,
    assembly_name=genome_data.data["assembly_name"],
    assembly_accession=genome_data.data["assembly_accession"],
    citations=[
        _genome2001,
        _tian2019.because(stdpopsim.CiteReason.MUT_RATE),
        _hapmap2007.because(stdpopsim.CiteReason.REC_RATE),
    ],
)
stdpopsim.utils.append_common_synonyms(_genome)

_species = stdpopsim.Species(
    id="HomSap",
    ensembl_id="homo_sapiens",
    name="H**o sapiens",
    common_name="Human",
    genome=_genome,
    generation_time=30,
    population_size=10**4,
    citations=[