def __init__(self, sampledata, refdata, job_params, outdir, libdir, umi, maxcores=1, scratch="/scratch/tmp/tmp", **kwargs): ClinseqPipeline.__init__(self, sampledata, refdata, job_params, outdir, libdir, umi, maxcores, scratch, **kwargs) # Set the min alt frac value: self.default_job_params["vardict-min-alt-frac"] = 0.01 self.default_job_params["vardict-min-num-reads"] = None self.default_job_params[ "vep-additional-options"] = " --pick --filter_common " # Remove clinseq barcodes for which data is not available: self.check_sampledata() if umi: # Configure the umi processes from fastq to bam file: self.configure_umi_processing() else: # Configure alignment and merging of fastq data for all clinseq barcodes: self.configure_align_and_merge() # Configure all panel analyses: self.configure_panel_analyses() # Configure liqbio-specific panel analyses: self.configure_panel_analyses_liqbio(umi) # Configure additional msings analysis: self.configure_panel_msings_analyses() # Configure QC of all panel data: self.configure_all_panel_qcs() # Configure fastq QCs: self.configure_fastq_qcs() # Configure the low-pass whole genome analysis: self.configure_lowpass_analyses() # Configure low-pass whole genome data QC: self.configure_all_lowpass_qcs() # Configure MultiQC: self.configure_multi_qc()
def __init__(self, sampledata, refdata, job_params, outdir, libdir, maxcores=1, scratch="/scratch/tmp/tmp/", referral_db_conf="tests/referrals/referral-db-config.json", addresses="tests/referrals/addresses.csv", **kwargs): ClinseqPipeline.__init__(self, sampledata, refdata, job_params, outdir, libdir, maxcores, scratch, **kwargs) self.referral_db_conf = referral_db_conf self.addresses = addresses self.default_job_params["vardict-min-num-reads"] = 6 self.default_job_params["create_alascca_report"] = True # Check to ensure that the sample data is valid for an ALASCCA analysis: self.validate_sample_data_for_alascca() # Remove sample capture items for which data is not available: self.check_sampledata() # Configure alignment and merging of fastq data for all clinseq barcodes: self.configure_align_and_merge() # Configure all panel analyses: self.configure_panel_analyses() # Configure QC of all panel data: self.configure_all_panel_qcs() # Configure ALASCCA report generation: self.configure_alascca_specific_analysis() # Configure fastq QCs: self.configure_fastq_qcs() # Configure MultiQC: self.configure_multi_qc()
def setUp(self): sample_data = { "sdid": "P-NA12877", "T": [ "AL-P-NA12877-T-03098849-TD1-TT1", "AL-P-NA12877-T-03098849-TD1-WGS" ], "N": [ "AL-P-NA12877-N-03098121-TD1-TT1", "AL-P-NA12877-N-03098121-TD1-WGS" ], "CFDNA": [ "LB-P-NA12877-CFDNA-03098850-TD1-TT1", "LB-P-NA12877-CFDNA-03098850-TD1-TT2", "LB-P-NA12877-CFDNA-03098850-TD1-WGS" ] } ref_data = { "bwaIndex": "bwa/test-genome-masked.fasta", "chrsizes": "genome/test-genome-masked.chrsizes.txt", "clinvar": "variants/clinvar_20160203.vcf.gz", "cosmic": "variants/CosmicCodingMuts_v71.vcf.gz", "dbSNP": "variants/dbsnp142-germline-only.vcf.gz", "exac": "variants/ExAC.r0.3.1.sites.vep.vcf.gz", "icgc": "variants/icgc_release_20_simple_somatic_mutation.aggregated.vcf.gz", "reference_dict": "genome/test-genome-masked.dict", "reference_genome": "genome/test-genome-masked.fasta", "swegene_common": "variants/swegen_common.vcf.gz", "targets": { "test-regions": { "cnvkit-ref": None, "msisites": "intervals/targets/test-regions.msisites.tsv", "targets-bed-slopped20": "intervals/targets/test-regions-GRCh37.slopped20.bed", "targets-interval_list": "intervals/targets/test-regions-GRCh37.slopped20.interval_list", "targets-interval_list-slopped20": "intervals/targets/test-regions-GRCh37.slopped20.interval_list" } }, "contest_vcfs": { "test-regions": "test_contest.vcf" }, "vep_dir": None } self.test_clinseq_pipeline = ClinseqPipeline( sample_data, ref_data, {}, "/tmp", "/nfs/LIQBIO/INBOX/exomes")
def __init__(self, sampledata, refdata, job_params, outdir, libdir, umi, maxcores=1, scratch="/scratch/tmp/tmp", **kwargs): ClinseqPipeline.__init__(self, sampledata, refdata, job_params, outdir, libdir, umi, maxcores, scratch, **kwargs) # Set the min alt frac value: self.default_job_params["vardict-min-alt-frac"] = 0.01 self.default_job_params["vardict-min-num-reads"] = None self.default_job_params["vep-additional-options"] = " --pick --filter_common " #Set initial data self.sampledata = sampledata self.refdata = refdata self.job_params = job_params self.outdir = outdir self.libdir = libdir self.umi = umi self.maxcores = maxcores self.scratch = scratch self.kwargs = kwargs self.somatic_merge_vcf = defaultdict(dict) #Below dictionary will set the steps to run aws batch job with docker image (key: docker image name , value: function to add job). self.step_to_run = { "qc": self.qc_step, #docker: base "alignment": self.alignment_step, #docker: aligner "cnvkit": self.cnvkit_step, #docker: variants "germline_variant": self.germline_variant_step, #docker: variants "somatic_vardict": self.somatic_variant_vardict_step, #docker: variants # tested working "somatic_strelka": self.somatic_variant_strelka_step, #docker: variants #tested working "somatic_mutect2": self.somatic_variant_mutect2_step, #docker: variants #tested working "somatic_varscan": self.somatic_variant_varscan_step, #docker: variants #tested working "somatic_variant_merge": self.somatic_variant_merge_step, #docker: somaticseq #tested working "vep": self.vep_step, #docker : vep #tested "msi" : self.msi_sensor_step }
def setUp(self): self.sample_data = { "sdid": "P-00202345", "N": ["LB-P-00202345-N-03277090-TP20190201-CP20190204"], "T": [], "CFDNA": [ "LB-P-00202345-CFDNA-03277089-TP20190201-CP20190204", "LB-P-00202345-CFDNA-03277089-TP20190201-CM20190204" ] } self.ref_data = { "bwaIndex": "bwa/test-genome-masked.fasta", "chrsizes": "genome/test-genome-masked.chrsizes.txt", "clinvar": "variants/clinvar_20160203.vcf.gz", "cosmic": "variants/CosmicCodingMuts_v71.vcf.gz", "dbSNP": "variants/dbsnp142-germline-only.vcf.gz", "exac": "variants/ExAC.r0.3.1.sites.vep.vcf.gz", "icgc": "variants/icgc_release_20_simple_somatic_mutation.aggregated.vcf.gz", "reference_dict": "genome/test-genome-masked.dict", "reference_genome": "genome/test-genome-masked.fasta", "swegene_common": "variants/swegen_common.vcf.gz", "ar_regions": "intervals/ar_regions.bed", "ts_regions": "intervals/ts_regions.bed", "fusion_regions": "intervals/fusion_regions.bed", "1KG": "/nfs/ALASCCA/autoseq-genome/variants/1000G_phase1.indels.b37.vcf.gz", "Mills_and_1KG_gold_standard": "/nfs/ALASCCA/autoseq-genome/variants/Mills_and_1000G_gold_standard.indels.b37.vcf.gz", "brca_exchange": "/nfs/ALASCCA/autoseq-genome/variants/BrcaExchangeClinvar_15Jan2019_v26_hg19.vcf.gz", "oncokb": "/nfs/ALASCCA/autoseq-genome/variants/OncoKB_6Mar19_v1.9.txt", "targets": { "test-regions": { "cnvkit-ref": { "THRUPLEX_PLASMASEQ": { "CFDNA": "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnn", "N": "intervals/targets/progression.THRUPLEX_PLASMASEQ.N.cnn" } }, "cnvkit-fix": { "THRUPLEX_PLASMASEQ": { "CFDNA": "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnvkit-fix.tsv" } }, "msisites": "intervals/targets/test-regions.msisites.tsv", "targets-bed-slopped20": "intervals/targets/test-regions-GRCh37.slopped20.bed", "targets-interval_list": "intervals/targets/test-regions-GRCh37.slopped20.interval_list", "targets-interval_list-slopped20": "intervals/targets/test-regions-GRCh37.slopped20.interval_list", "blacklist-bed": None, "purecn_targets": "intervals/targets/purecn.bed", }, "progression": { "blacklist-bed": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.blacklist.bed", "cnvkit-fix": { "THRUPLEX_PLASMASEQ": { "CFDNA": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnvkit-fix.tsv" } }, "cnvkit-ref": { "KAPA_HYPERPREP": { "N": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.KAPA_HYPERPREP.N.cnn", "T": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.KAPA_HYPERPREP.T.cnn" }, "THRUPLEX_PLASMASEQ": { "CFDNA": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnn", "N": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.THRUPLEX_PLASMASEQ.N.cnn" } }, "msings-baseline": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.msings.baseline", "msings-bed": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.msings.bed", "msings-msi_intervals": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.msings.msi_intervals", "msisites": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.slopped20.msisites.tsv", "purecn_targets": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.purecn.txt", "targets-bed-slopped20": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.slopped20.bed.gz", "targets-interval_list": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.interval_list", "targets-interval_list-slopped20": "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.slopped20.interval_list" }, "monitor": { "blacklist-bed": "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.blacklist.bed", "msings-baseline": None, "msings-bed": None, "msings-msi_intervals": None, "msisites": "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.slopped20.msisites.tsv", "purecn_targets": None, "targets-bed-slopped20": "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.slopped20.bed.gz", "targets-interval_list": "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.interval_list", "targets-interval_list-slopped20": "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.slopped20.interval_list" } }, "contest_vcfs": { "test-regions": "test_contest.vcf" }, "vep_dir": "dummy_vep_dir" } self.test_tumor_capture = UniqueCapture("LB", "P-00202345", "CFDNA", "03277089", "TP", "CP") self.test_normal_capture = UniqueCapture("LB", "P-00202345", "N", "03277090", "TP", "CP") self.test_clinseq_pipeline = ClinseqPipeline( self.sample_data, self.ref_data, {}, "/tmp/liqbio-test/", "/media/clinseq/disk4/PROBIO/", 'FALSE')
def setUp(self): sample_data = { "sdid": "P-NA12877", "T": [ "AL-P-NA12877-T-03098849-TD1-TT1", "AL-P-NA12877-T-03098849-TD1-WGS" ], "N": [ "AL-P-NA12877-N-03098121-TD1-TT1", "AL-P-NA12877-N-03098121-TD1-WGS" ], "CFDNA": [ "LB-P-NA12877-CFDNA-03098850-TD1-TT1", "LB-P-NA12877-CFDNA-03098850-TD1-TT2", "LB-P-NA12877-CFDNA-03098850-TD1-WGS" ] } self.ref_data = { "bwaIndex": "bwa/test-genome-masked.fasta", "chrsizes": "genome/test-genome-masked.chrsizes.txt", "clinvar": "variants/clinvar_20160203.vcf.gz", "cosmic": "variants/CosmicCodingMuts_v71.vcf.gz", "dbSNP": "variants/dbsnp142-germline-only.vcf.gz", "exac": "variants/ExAC.r0.3.1.sites.vep.vcf.gz", "icgc": "variants/icgc_release_20_simple_somatic_mutation.aggregated.vcf.gz", "reference_dict": "genome/test-genome-masked.dict", "reference_genome": "genome/test-genome-masked.fasta", "swegene_common": "variants/swegen_common.vcf.gz", "ar_regions": "intervals/ar_regions.bed", "ts_regions": "intervals/ts_regions.bed", "fusion_regions": "intervals/fusion_regions.bed", "targets": { "test-regions": { "cnvkit-ref": { "THRUPLEX_PLASMASEQ": { "CFDNA": "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnn", "N": "intervals/targets/progression.THRUPLEX_PLASMASEQ.N.cnn" } }, "cnvkit-fix": { "THRUPLEX_PLASMASEQ": { "CFDNA": "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnvkit-fix.tsv" } }, "msisites": "intervals/targets/test-regions.msisites.tsv", "targets-bed-slopped20": "intervals/targets/test-regions-GRCh37.slopped20.bed", "targets-interval_list": "intervals/targets/test-regions-GRCh37.slopped20.interval_list", "targets-interval_list-slopped20": "intervals/targets/test-regions-GRCh37.slopped20.interval_list", "blacklist-bed": None, "purecn_targets": "intervals/targets/purecn.bed", } }, "contest_vcfs": { "test-regions": "test_contest.vcf" }, "vep_dir": "dummy_vep_dir" } self.test_cancer_capture = UniqueCapture("AL", "P-NA12877", "CFDNA", "03098850", "TD", "TT") self.test_normal_capture = UniqueCapture("AL", "P-NA12877", "N", "03098121", "TD", "TT") self.test_clinseq_pipeline = ClinseqPipeline( sample_data, self.ref_data, {}, "/tmp", "/nfs/LIQBIO/INBOX/exomes")