Пример #1
0
    "download_gencode_v35_gtf",
    "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_35/gencode.v35.annotation.gtf.gz",
    "/external_sources/gencode.v35.gtf.gz",
)

pipeline.add_download_task(
    "download_hgnc_names",
    "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_ensembl_id&col=md_ensembl_id&col=md_mim_id&status=Approved&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit",
    "/external_sources/hgnc.tsv",
)

pipeline.add_task(
    "prepare_grch37_genes",
    prepare_genes,
    "/genes/genes_grch37_base.ht",
    {
        "gencode_path": pipeline.get_task("download_gencode_v19_gtf"),
        "hgnc_path": pipeline.get_task("download_hgnc_names"),
    },
    {"reference_genome": "GRCh37"},
)

pipeline.add_task(
    "prepare_grch38_genes",
    prepare_genes,
    "/genes/genes_grch38_base.ht",
    {
        "gencode_path": pipeline.get_task("download_gencode_v35_gtf"),
        "hgnc_path": pipeline.get_task("download_hgnc_names"),
    },
    {"reference_genome": "GRCh38"},
)
Пример #2
0
pipeline.add_download_task(
    "download_mnvs",
    "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_v0.tsv",
    "/gnomad_v2/gnomad_mnv_coding_v0.tsv",
)

pipeline.add_download_task(
    "download_3bp_mnvs",
    "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_3bp_fullannotation.tsv",
    "/gnomad_v2/gnomad_mnv_coding_3bp_fullannotation.tsv",
)

pipeline.add_task(
    "replace_mnv_quote_char",
    replace_quote_char,
    "/gnomad_v2/gnomad_mnv_coding_v0-quoted.tsv",
    {"path": pipeline.get_task("download_mnvs")},
)

pipeline.add_task(
    "replace_3bp_mnv_quote_char",
    replace_quote_char,
    "/gnomad_v2/gnomad_mnv_coding_3bp_fullannotation-quoted.tsv",
    {"path": pipeline.get_task("download_3bp_mnvs")},
)

pipeline.add_task(
    "prepare_gnomad_v2_mnvs",
    prepare_gnomad_v2_mnvs,
    "/gnomad_v2/gnomad_v2_mnvs.ht",
    {
Пример #3
0
from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

pipeline.add_download_task(
    "download_clinvar_grch38_vcf",
    "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz",
    "/external_sources/clinvar_grch38.vcf.gz",
)

pipeline.add_task(
    "prepare_clinvar_grch38_variants",
    prepare_clinvar_variants,
    "/clinvar/clinvar_grch38_base.ht",
    {"vcf_path": pipeline.get_task("download_clinvar_grch38_vcf")},
    {"reference_genome": "GRCh38"},
)

pipeline.add_task(
    "annotate_clinvar_grch38_transcript_consequences",
    annotate_transcript_consequences,
    "/clinvar/clinvar_grch38_annotated.ht",
    {
        "variants_path": pipeline.get_task("prepare_clinvar_grch38_variants"),
        "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"),
        "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"),
    },
)
Пример #4
0
from data_pipeline.datasets.exac.exac_coverage import import_exac_coverage
from data_pipeline.datasets.exac.exac_variants import import_exac_vcf

from data_pipeline.pipelines.genes import pipeline as genes_pipeline

pipeline = Pipeline()

###############################################
# Variants
###############################################

pipeline.add_task(
    "import_exac_vcf",
    import_exac_vcf,
    "/exac/exac_variants.ht",
    {
        "path":
        "gs://gnomad-public/legacy/exac_browser/ExAC.r1.sites.vep.vcf.gz"
    },
)

pipeline.add_task(
    "annotate_exac_transcript_consequences",
    annotate_transcript_consequences,
    "/exac/exac_variants_annotated_1.ht",
    {
        "variants_path": pipeline.get_task("import_exac_vcf"),
        "transcripts_path":
        genes_pipeline.get_task("extract_grch37_transcripts"),
    },
)
Пример #5
0
from data_pipeline.data_types.variant import annotate_transcript_consequences

from data_pipeline.datasets.gnomad_v3.gnomad_v3_variants import prepare_gnomad_v3_variants

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

###############################################
# Variants
###############################################

pipeline.add_task(
    "prepare_gnomad_v3_variants",
    prepare_gnomad_v3_variants,
    "/gnomad_v3/gnomad_v3_variants_base.ht",
    {"path": "gs://gnomad/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.ht"},
)

pipeline.add_task(
    "annotate_gnomad_v3_transcript_consequences",
    annotate_transcript_consequences,
    "/gnomad_v3/gnomad_v3_variants_annotated_1.ht",
    {
        "variants_path": pipeline.get_task("prepare_gnomad_v3_variants"),
        "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"),
        "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"),
    },
)

###############################################
Пример #6
0
)

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

pipeline.add_download_task(
    "download_clinvar_xml",
    "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz",
    "/external_sources/clinvar.xml.gz",
)

pipeline.add_task(
    "import_clinvar_xml",
    import_clinvar_xml,
    "/clinvar/clinvar.ht",
    {"clinvar_xml_path": pipeline.get_task("download_clinvar_xml")},
)

pipeline.add_task(
    "prepare_clinvar_grch37_variants",
    prepare_clinvar_variants,
    "/clinvar/clinvar_grch37_base.ht",
    {"clinvar_path": pipeline.get_task("import_clinvar_xml")},
    {"reference_genome": "GRCh37"},
)

pipeline.add_task(
    "vep_clinvar_grch37_variants",
    # tolerate_parse_error to ignore not a number error from "NaN" gene symbol
    lambda path: hl.vep(hl.read_table(path), tolerate_parse_error=True).drop("vep_proc_id"),
Пример #7
0
from data_pipeline.datasets.mitochondria import prepare_mitochondrial_coverage, prepare_mitochondrial_variants

from data_pipeline.pipelines.genes import pipeline as genes_pipeline

pipeline = Pipeline()

###############################################
# Variants
###############################################

pipeline.add_task(
    "prepare_mitochondrial_variants",
    prepare_mitochondrial_variants,
    "/mitochondria/mitochondrial_variants_base.ht",
    {
        "path":
        "gs://gnomad-public-requester-pays/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.chrM.ht",
        "mnvs_path": "gs://gnomad-browser/mt_mnvs.tsv",
    },
)

pipeline.add_task(
    "annotate_mitochondrial_variant_transcript_consequences",
    annotate_transcript_consequences,
    "/mitochondria/mitochondrial_variants_annotated_1.ht",
    {
        "variants_path":
        pipeline.get_task("prepare_mitochondrial_variants"),
        "transcripts_path":
        genes_pipeline.get_task("extract_grch38_transcripts"),
        "mane_transcripts_path":
Пример #8
0
)

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

pipeline.add_download_task(
    "download_clinvar_xml",
    "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz",
    "/external_sources/clinvar.xml.gz",
)

pipeline.add_task(
    "import_clinvar_xml",
    import_clinvar_xml,
    "/clinvar/clinvar.ht",
    {"clinvar_xml_path": pipeline.get_task("download_clinvar_xml")},
)

pipeline.add_task(
    "prepare_clinvar_grch38_variants",
    prepare_clinvar_variants,
    "/clinvar/clinvar_grch38_base.ht",
    {"clinvar_path": pipeline.get_task("import_clinvar_xml")},
    {"reference_genome": "GRCh38"},
)

pipeline.add_task(
    "vep_clinvar_grch38_variants",
    lambda path: hl.vep(hl.read_table(path)).drop("vep_proc_id"),
    "/clinvar/clinvar_grch38_vepped.ht",
Пример #9
0
from data_pipeline.datasets.gnomad_sv_v2 import prepare_gnomad_structural_variants

pipeline = Pipeline()

###############################################
# Variants
###############################################

pipeline.add_task(
    "prepare_structural_variants",
    prepare_gnomad_structural_variants,
    "/gnomad_sv_v2/structural_variants.ht",
    {
        "vcf_path":
        "gs://gnomad-public/papers/2019-sv/gnomad_v2.1_sv.sites.vcf.gz",
        "controls_vcf_path":
        "gs://gnomad-public/papers/2019-sv/gnomad_v2.1_sv.controls_only.sites.vcf.gz",
        "non_neuro_vcf_path":
        "gs://gnomad-public/papers/2019-sv/gnomad_v2.1_sv.nonneuro.sites.vcf.gz",
        "histograms_path":
        "gs://gnomad-public/papers/2019-sv/gnomad_sv_hists.ht",
    },
)

###############################################
# Run
###############################################

if __name__ == "__main__":
    run_pipeline(pipeline)