Пример #1
0
    replace_quote_char,
)
from data_pipeline.datasets.gnomad_v2.gnomad_v2_variants import prepare_gnomad_v2_variants

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

###############################################
# MNVs
###############################################

pipeline.add_download_task(
    "download_mnvs",
    "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_v0.tsv",
    "/gnomad_v2/gnomad_mnv_coding_v0.tsv",
)

pipeline.add_download_task(
    "download_3bp_mnvs",
    "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_3bp_fullannotation.tsv",
    "/gnomad_v2/gnomad_mnv_coding_3bp_fullannotation.tsv",
)

pipeline.add_task(
    "replace_mnv_quote_char",
    replace_quote_char,
    "/gnomad_v2/gnomad_mnv_coding_v0-quoted.tsv",
    {"path": pipeline.get_task("download_mnvs")},
)
Пример #2
0
from data_pipeline.data_types.gtex_tissue_expression import prepare_gtex_expression_data
from data_pipeline.data_types.pext import prepare_pext_data

from data_pipeline.datasets.exac.exac_constraint import prepare_exac_constraint
from data_pipeline.datasets.exac.exac_regional_missense_constraint import prepare_exac_regional_missense_constraint
from data_pipeline.datasets.gnomad_v2.gnomad_v2_constraint import prepare_gnomad_v2_constraint

pipeline = Pipeline()

###############################################
# Import GENCODE and HGNC files
###############################################

pipeline.add_download_task(
    "download_gencode_v19_gtf",
    "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz",
    "/external_sources/gencode.v19.gtf.gz",
)

pipeline.add_download_task(
    "download_gencode_v35_gtf",
    "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_35/gencode.v35.annotation.gtf.gz",
    "/external_sources/gencode.v35.gtf.gz",
)

pipeline.add_download_task(
    "download_hgnc_names",
    "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_ensembl_id&col=md_ensembl_id&col=md_mim_id&status=Approved&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit",
    "/external_sources/hgnc.tsv",
)
Пример #3
0
from data_pipeline.datasets.exac.exac_constraint import prepare_exac_constraint
from data_pipeline.datasets.exac.exac_regional_missense_constraint import prepare_exac_regional_missense_constraint
from data_pipeline.datasets.gnomad_v2.gnomad_v2_constraint import prepare_gnomad_v2_constraint


pipeline = Pipeline()

###############################################
# Import GENCODE and HGNC files
###############################################

GENCODE_V19_URL = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz"
GENCODE_V35_URL = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_35/gencode.v35.annotation.gtf.gz"

pipeline.add_download_task(
    "download_gencode_v19_gtf", GENCODE_V19_URL, "/external_sources/" + GENCODE_V19_URL.split("/")[-1]
)

pipeline.add_download_task(
    "download_gencode_v35_gtf", GENCODE_V35_URL, "/external_sources/" + GENCODE_V35_URL.split("/")[-1]
)

pipeline.add_download_task(
    "download_hgnc_names",
    "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_ensembl_id&col=md_ensembl_id&col=md_mim_id&status=Approved&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit",
    "/external_sources/hgnc.tsv",
)

pipeline.add_task(
    "prepare_grch37_genes",
    prepare_genes,
Пример #4
0
from data_pipeline.pipeline import Pipeline, run_pipeline

from data_pipeline.data_types.variant import annotate_transcript_consequences

from data_pipeline.datasets.clinvar import prepare_clinvar_variants

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

pipeline.add_download_task(
    "download_clinvar_grch38_vcf",
    "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz",
    "/external_sources/clinvar_grch38.vcf.gz",
)

pipeline.add_task(
    "prepare_clinvar_grch38_variants",
    prepare_clinvar_variants,
    "/clinvar/clinvar_grch38_base.ht",
    {"vcf_path": pipeline.get_task("download_clinvar_grch38_vcf")},
    {"reference_genome": "GRCh38"},
)

pipeline.add_task(
    "annotate_clinvar_grch38_transcript_consequences",
    annotate_transcript_consequences,
    "/clinvar/clinvar_grch38_annotated.ht",
    {
        "variants_path": pipeline.get_task("prepare_clinvar_grch38_variants"),
Пример #5
0
from data_pipeline.data_types.variant import annotate_transcript_consequences

from data_pipeline.datasets.clinvar import (
    import_clinvar_xml,
    prepare_clinvar_variants,
    annotate_clinvar_variants_in_gnomad,
)

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

pipeline.add_download_task(
    "download_clinvar_xml",
    "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz",
    "/external_sources/clinvar.xml.gz",
)

pipeline.add_task(
    "import_clinvar_xml",
    import_clinvar_xml,
    "/clinvar/clinvar.ht",
    {"clinvar_xml_path": pipeline.get_task("download_clinvar_xml")},
)

pipeline.add_task(
    "prepare_clinvar_grch37_variants",
    prepare_clinvar_variants,
    "/clinvar/clinvar_grch37_base.ht",
    {"clinvar_path": pipeline.get_task("import_clinvar_xml")},