"ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_35/gencode.v35.annotation.gtf.gz", "/external_sources/gencode.v35.gtf.gz", ) pipeline.add_download_task( "download_hgnc_names", "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_ensembl_id&col=md_ensembl_id&col=md_mim_id&status=Approved&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit", "/external_sources/hgnc.tsv", ) pipeline.add_task( "prepare_grch37_genes", prepare_genes, "/genes/genes_grch37_base.ht", { "gencode_path": pipeline.get_task("download_gencode_v19_gtf"), "hgnc_path": pipeline.get_task("download_hgnc_names"), }, {"reference_genome": "GRCh37"}, ) pipeline.add_task( "prepare_grch38_genes", prepare_genes, "/genes/genes_grch38_base.ht", { "gencode_path": pipeline.get_task("download_gencode_v35_gtf"), "hgnc_path": pipeline.get_task("download_hgnc_names"), }, {"reference_genome": "GRCh38"}, )
"download_mnvs", "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_v0.tsv", "/gnomad_v2/gnomad_mnv_coding_v0.tsv", ) pipeline.add_download_task( "download_3bp_mnvs", "https://storage.googleapis.com/gnomad-public/release/2.1/mnv/gnomad_mnv_coding_3bp_fullannotation.tsv", "/gnomad_v2/gnomad_mnv_coding_3bp_fullannotation.tsv", ) pipeline.add_task( "replace_mnv_quote_char", replace_quote_char, "/gnomad_v2/gnomad_mnv_coding_v0-quoted.tsv", {"path": pipeline.get_task("download_mnvs")}, ) pipeline.add_task( "replace_3bp_mnv_quote_char", replace_quote_char, "/gnomad_v2/gnomad_mnv_coding_3bp_fullannotation-quoted.tsv", {"path": pipeline.get_task("download_3bp_mnvs")}, ) pipeline.add_task( "prepare_gnomad_v2_mnvs", prepare_gnomad_v2_mnvs, "/gnomad_v2/gnomad_v2_mnvs.ht", { "mnvs_path": pipeline.get_task("replace_mnv_quote_char"),
pipeline.add_task( "import_exac_vcf", import_exac_vcf, "/exac/exac_variants.ht", { "path": "gs://gnomad-public/legacy/exac_browser/ExAC.r1.sites.vep.vcf.gz" }, ) pipeline.add_task( "annotate_exac_transcript_consequences", annotate_transcript_consequences, "/exac/exac_variants_annotated_1.ht", { "variants_path": pipeline.get_task("import_exac_vcf"), "transcripts_path": genes_pipeline.get_task("extract_grch37_transcripts"), }, ) ############################################### # Coverage ############################################### pipeline.add_task( "import_exac_coverage", import_exac_coverage, "/exac/exac_coverage.ht", )
from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() pipeline.add_download_task( "download_clinvar_grch38_vcf", "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz", "/external_sources/clinvar_grch38.vcf.gz", ) pipeline.add_task( "prepare_clinvar_grch38_variants", prepare_clinvar_variants, "/clinvar/clinvar_grch38_base.ht", {"vcf_path": pipeline.get_task("download_clinvar_grch38_vcf")}, {"reference_genome": "GRCh38"}, ) pipeline.add_task( "annotate_clinvar_grch38_transcript_consequences", annotate_transcript_consequences, "/clinvar/clinvar_grch38_annotated.ht", { "variants_path": pipeline.get_task("prepare_clinvar_grch38_variants"), "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"), "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"), }, ) ###############################################
# Variants ############################################### pipeline.add_task( "prepare_gnomad_v3_variants", prepare_gnomad_v3_variants, "/gnomad_v3/gnomad_v3_variants_base.ht", {"path": "gs://gnomad/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.ht"}, ) pipeline.add_task( "annotate_gnomad_v3_transcript_consequences", annotate_transcript_consequences, "/gnomad_v3/gnomad_v3_variants_annotated_1.ht", { "variants_path": pipeline.get_task("prepare_gnomad_v3_variants"), "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"), "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"), }, ) ############################################### # Coverage ############################################### pipeline.add_task( "prepare_gnomad_v3_coverage", prepare_coverage, "/gnomad_v3/gnomad_v3_genome_coverage.ht", { "coverage_path": "gs://gnomad-public-requester-pays/release/3.0.1/coverage/genomes/gnomad.genomes.r3.0.1.coverage.ht"
from data_pipeline.pipelines.genes import pipeline as genes_pipeline pipeline = Pipeline() pipeline.add_download_task( "download_clinvar_xml", "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz", "/external_sources/clinvar.xml.gz", ) pipeline.add_task( "import_clinvar_xml", import_clinvar_xml, "/clinvar/clinvar.ht", {"clinvar_xml_path": pipeline.get_task("download_clinvar_xml")}, ) pipeline.add_task( "prepare_clinvar_grch37_variants", prepare_clinvar_variants, "/clinvar/clinvar_grch37_base.ht", {"clinvar_path": pipeline.get_task("import_clinvar_xml")}, {"reference_genome": "GRCh37"}, ) pipeline.add_task( "vep_clinvar_grch37_variants", # tolerate_parse_error to ignore not a number error from "NaN" gene symbol lambda path: hl.vep(hl.read_table(path), tolerate_parse_error=True).drop("vep_proc_id"), "/clinvar/clinvar_grch37_vepped.ht",
prepare_mitochondrial_variants, "/mitochondria/mitochondrial_variants_base.ht", { "path": "gs://gnomad-public-requester-pays/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.chrM.ht", "mnvs_path": "gs://gnomad-browser/mt_mnvs.tsv", }, ) pipeline.add_task( "annotate_mitochondrial_variant_transcript_consequences", annotate_transcript_consequences, "/mitochondria/mitochondrial_variants_annotated_1.ht", { "variants_path": pipeline.get_task("prepare_mitochondrial_variants"), "transcripts_path": genes_pipeline.get_task("extract_grch38_transcripts"), "mane_transcripts_path": genes_pipeline.get_task("import_mane_select_transcripts"), }, ) ############################################### # Coverage ############################################### pipeline.add_task( "prepare_mitochondrial_coverage", prepare_mitochondrial_coverage, "/mitochondria/mitochondria_genome_coverage.ht",