Exemplo n.º 1
0
    def __init__(self,
                 sampledata,
                 refdata,
                 job_params,
                 outdir,
                 libdir,
                 umi,
                 maxcores=1,
                 scratch="/scratch/tmp/tmp",
                 **kwargs):
        ClinseqPipeline.__init__(self, sampledata, refdata, job_params, outdir,
                                 libdir, umi, maxcores, scratch, **kwargs)

        # Set the min alt frac value:
        self.default_job_params["vardict-min-alt-frac"] = 0.01
        self.default_job_params["vardict-min-num-reads"] = None
        self.default_job_params[
            "vep-additional-options"] = " --pick --filter_common "

        # Remove clinseq barcodes for which data is not available:
        self.check_sampledata()

        if umi:
            # Configure the umi processes from fastq to bam file:
            self.configure_umi_processing()
        else:
            # Configure alignment and merging of fastq data for all clinseq barcodes:
            self.configure_align_and_merge()

        # Configure all panel analyses:
        self.configure_panel_analyses()

        # Configure liqbio-specific panel analyses:
        self.configure_panel_analyses_liqbio(umi)

        # Configure additional msings analysis:
        self.configure_panel_msings_analyses()

        # Configure QC of all panel data:
        self.configure_all_panel_qcs()

        # Configure fastq QCs:
        self.configure_fastq_qcs()

        # Configure the low-pass whole genome analysis:
        self.configure_lowpass_analyses()

        # Configure low-pass whole genome data QC:
        self.configure_all_lowpass_qcs()

        # Configure MultiQC:
        self.configure_multi_qc()
Exemplo n.º 2
0
    def __init__(self,
                 sampledata,
                 refdata,
                 job_params,
                 outdir,
                 libdir,
                 maxcores=1,
                 scratch="/scratch/tmp/tmp/",
                 referral_db_conf="tests/referrals/referral-db-config.json",
                 addresses="tests/referrals/addresses.csv",
                 **kwargs):
        ClinseqPipeline.__init__(self, sampledata, refdata, job_params, outdir,
                                 libdir, maxcores, scratch, **kwargs)

        self.referral_db_conf = referral_db_conf
        self.addresses = addresses
        self.default_job_params["vardict-min-num-reads"] = 6
        self.default_job_params["create_alascca_report"] = True

        # Check to ensure that the sample data is valid for an ALASCCA analysis:
        self.validate_sample_data_for_alascca()

        # Remove sample capture items for which data is not available:
        self.check_sampledata()

        # Configure alignment and merging of fastq data for all clinseq barcodes:
        self.configure_align_and_merge()

        # Configure all panel analyses:
        self.configure_panel_analyses()

        # Configure QC of all panel data:
        self.configure_all_panel_qcs()

        # Configure ALASCCA report generation:
        self.configure_alascca_specific_analysis()

        # Configure fastq QCs:
        self.configure_fastq_qcs()

        # Configure MultiQC:
        self.configure_multi_qc()
Exemplo n.º 3
0
 def setUp(self):
     sample_data = {
         "sdid":
         "P-NA12877",
         "T": [
             "AL-P-NA12877-T-03098849-TD1-TT1",
             "AL-P-NA12877-T-03098849-TD1-WGS"
         ],
         "N": [
             "AL-P-NA12877-N-03098121-TD1-TT1",
             "AL-P-NA12877-N-03098121-TD1-WGS"
         ],
         "CFDNA": [
             "LB-P-NA12877-CFDNA-03098850-TD1-TT1",
             "LB-P-NA12877-CFDNA-03098850-TD1-TT2",
             "LB-P-NA12877-CFDNA-03098850-TD1-WGS"
         ]
     }
     ref_data = {
         "bwaIndex": "bwa/test-genome-masked.fasta",
         "chrsizes": "genome/test-genome-masked.chrsizes.txt",
         "clinvar": "variants/clinvar_20160203.vcf.gz",
         "cosmic": "variants/CosmicCodingMuts_v71.vcf.gz",
         "dbSNP": "variants/dbsnp142-germline-only.vcf.gz",
         "exac": "variants/ExAC.r0.3.1.sites.vep.vcf.gz",
         "icgc":
         "variants/icgc_release_20_simple_somatic_mutation.aggregated.vcf.gz",
         "reference_dict": "genome/test-genome-masked.dict",
         "reference_genome": "genome/test-genome-masked.fasta",
         "swegene_common": "variants/swegen_common.vcf.gz",
         "targets": {
             "test-regions": {
                 "cnvkit-ref":
                 None,
                 "msisites":
                 "intervals/targets/test-regions.msisites.tsv",
                 "targets-bed-slopped20":
                 "intervals/targets/test-regions-GRCh37.slopped20.bed",
                 "targets-interval_list":
                 "intervals/targets/test-regions-GRCh37.slopped20.interval_list",
                 "targets-interval_list-slopped20":
                 "intervals/targets/test-regions-GRCh37.slopped20.interval_list"
             }
         },
         "contest_vcfs": {
             "test-regions": "test_contest.vcf"
         },
         "vep_dir": None
     }
     self.test_clinseq_pipeline = ClinseqPipeline(
         sample_data, ref_data, {}, "/tmp", "/nfs/LIQBIO/INBOX/exomes")
Exemplo n.º 4
0
    def __init__(self, sampledata, refdata, job_params, outdir, libdir, umi, maxcores=1, scratch="/scratch/tmp/tmp",
                 **kwargs):
        ClinseqPipeline.__init__(self, sampledata, refdata, job_params, outdir, libdir, umi,
                                 maxcores, scratch, **kwargs)

        # Set the min alt frac value:
        self.default_job_params["vardict-min-alt-frac"] = 0.01
        self.default_job_params["vardict-min-num-reads"] = None
        self.default_job_params["vep-additional-options"] = " --pick --filter_common "

        #Set initial data
        self.sampledata = sampledata
        self.refdata = refdata
        self.job_params = job_params
        self.outdir = outdir
        self.libdir = libdir
        self.umi = umi
        self.maxcores = maxcores
        self.scratch = scratch
        self.kwargs = kwargs
        self.somatic_merge_vcf = defaultdict(dict)

        #Below dictionary will set the steps to run aws batch job with docker image (key: docker image name , value: function to add job).
        self.step_to_run = {
            "qc": self.qc_step,   #docker: base
            "alignment": self.alignment_step,   #docker: aligner
            "cnvkit": self.cnvkit_step,         #docker: variants
            "germline_variant": self.germline_variant_step, #docker: variants
            "somatic_vardict": self.somatic_variant_vardict_step,  #docker: variants # tested working
            "somatic_strelka": self.somatic_variant_strelka_step,   #docker: variants #tested working
            "somatic_mutect2": self.somatic_variant_mutect2_step,   #docker: variants #tested working
            "somatic_varscan": self.somatic_variant_varscan_step,   #docker: variants #tested working
            "somatic_variant_merge": self.somatic_variant_merge_step, #docker: somaticseq #tested working
            "vep": self.vep_step, #docker : vep #tested
            "msi" : self.msi_sensor_step
        }
Exemplo n.º 5
0
    def setUp(self):
        self.sample_data = {
            "sdid":
            "P-00202345",
            "N": ["LB-P-00202345-N-03277090-TP20190201-CP20190204"],
            "T": [],
            "CFDNA": [
                "LB-P-00202345-CFDNA-03277089-TP20190201-CP20190204",
                "LB-P-00202345-CFDNA-03277089-TP20190201-CM20190204"
            ]
        }

        self.ref_data = {
            "bwaIndex": "bwa/test-genome-masked.fasta",
            "chrsizes": "genome/test-genome-masked.chrsizes.txt",
            "clinvar": "variants/clinvar_20160203.vcf.gz",
            "cosmic": "variants/CosmicCodingMuts_v71.vcf.gz",
            "dbSNP": "variants/dbsnp142-germline-only.vcf.gz",
            "exac": "variants/ExAC.r0.3.1.sites.vep.vcf.gz",
            "icgc":
            "variants/icgc_release_20_simple_somatic_mutation.aggregated.vcf.gz",
            "reference_dict": "genome/test-genome-masked.dict",
            "reference_genome": "genome/test-genome-masked.fasta",
            "swegene_common": "variants/swegen_common.vcf.gz",
            "ar_regions": "intervals/ar_regions.bed",
            "ts_regions": "intervals/ts_regions.bed",
            "fusion_regions": "intervals/fusion_regions.bed",
            "1KG":
            "/nfs/ALASCCA/autoseq-genome/variants/1000G_phase1.indels.b37.vcf.gz",
            "Mills_and_1KG_gold_standard":
            "/nfs/ALASCCA/autoseq-genome/variants/Mills_and_1000G_gold_standard.indels.b37.vcf.gz",
            "brca_exchange":
            "/nfs/ALASCCA/autoseq-genome/variants/BrcaExchangeClinvar_15Jan2019_v26_hg19.vcf.gz",
            "oncokb":
            "/nfs/ALASCCA/autoseq-genome/variants/OncoKB_6Mar19_v1.9.txt",
            "targets": {
                "test-regions": {
                    "cnvkit-ref": {
                        "THRUPLEX_PLASMASEQ": {
                            "CFDNA":
                            "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnn",
                            "N":
                            "intervals/targets/progression.THRUPLEX_PLASMASEQ.N.cnn"
                        }
                    },
                    "cnvkit-fix": {
                        "THRUPLEX_PLASMASEQ": {
                            "CFDNA":
                            "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnvkit-fix.tsv"
                        }
                    },
                    "msisites": "intervals/targets/test-regions.msisites.tsv",
                    "targets-bed-slopped20":
                    "intervals/targets/test-regions-GRCh37.slopped20.bed",
                    "targets-interval_list":
                    "intervals/targets/test-regions-GRCh37.slopped20.interval_list",
                    "targets-interval_list-slopped20":
                    "intervals/targets/test-regions-GRCh37.slopped20.interval_list",
                    "blacklist-bed": None,
                    "purecn_targets": "intervals/targets/purecn.bed",
                },
                "progression": {
                    "blacklist-bed":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.blacklist.bed",
                    "cnvkit-fix": {
                        "THRUPLEX_PLASMASEQ": {
                            "CFDNA":
                            "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnvkit-fix.tsv"
                        }
                    },
                    "cnvkit-ref": {
                        "KAPA_HYPERPREP": {
                            "N":
                            "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.KAPA_HYPERPREP.N.cnn",
                            "T":
                            "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.KAPA_HYPERPREP.T.cnn"
                        },
                        "THRUPLEX_PLASMASEQ": {
                            "CFDNA":
                            "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnn",
                            "N":
                            "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.THRUPLEX_PLASMASEQ.N.cnn"
                        }
                    },
                    "msings-baseline":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.msings.baseline",
                    "msings-bed":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.msings.bed",
                    "msings-msi_intervals":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.msings.msi_intervals",
                    "msisites":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.slopped20.msisites.tsv",
                    "purecn_targets":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.purecn.txt",
                    "targets-bed-slopped20":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.slopped20.bed.gz",
                    "targets-interval_list":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.interval_list",
                    "targets-interval_list-slopped20":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/progression.slopped20.interval_list"
                },
                "monitor": {
                    "blacklist-bed":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.blacklist.bed",
                    "msings-baseline":
                    None,
                    "msings-bed":
                    None,
                    "msings-msi_intervals":
                    None,
                    "msisites":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.slopped20.msisites.tsv",
                    "purecn_targets":
                    None,
                    "targets-bed-slopped20":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.slopped20.bed.gz",
                    "targets-interval_list":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.interval_list",
                    "targets-interval_list-slopped20":
                    "/nfs/ALASCCA/autoseq-genome/intervals/targets/monitor.slopped20.interval_list"
                }
            },
            "contest_vcfs": {
                "test-regions": "test_contest.vcf"
            },
            "vep_dir": "dummy_vep_dir"
        }

        self.test_tumor_capture = UniqueCapture("LB", "P-00202345", "CFDNA",
                                                "03277089", "TP", "CP")
        self.test_normal_capture = UniqueCapture("LB", "P-00202345", "N",
                                                 "03277090", "TP", "CP")
        self.test_clinseq_pipeline = ClinseqPipeline(
            self.sample_data, self.ref_data, {}, "/tmp/liqbio-test/",
            "/media/clinseq/disk4/PROBIO/", 'FALSE')
 def setUp(self):
     sample_data = {
         "sdid":
         "P-NA12877",
         "T": [
             "AL-P-NA12877-T-03098849-TD1-TT1",
             "AL-P-NA12877-T-03098849-TD1-WGS"
         ],
         "N": [
             "AL-P-NA12877-N-03098121-TD1-TT1",
             "AL-P-NA12877-N-03098121-TD1-WGS"
         ],
         "CFDNA": [
             "LB-P-NA12877-CFDNA-03098850-TD1-TT1",
             "LB-P-NA12877-CFDNA-03098850-TD1-TT2",
             "LB-P-NA12877-CFDNA-03098850-TD1-WGS"
         ]
     }
     self.ref_data = {
         "bwaIndex": "bwa/test-genome-masked.fasta",
         "chrsizes": "genome/test-genome-masked.chrsizes.txt",
         "clinvar": "variants/clinvar_20160203.vcf.gz",
         "cosmic": "variants/CosmicCodingMuts_v71.vcf.gz",
         "dbSNP": "variants/dbsnp142-germline-only.vcf.gz",
         "exac": "variants/ExAC.r0.3.1.sites.vep.vcf.gz",
         "icgc":
         "variants/icgc_release_20_simple_somatic_mutation.aggregated.vcf.gz",
         "reference_dict": "genome/test-genome-masked.dict",
         "reference_genome": "genome/test-genome-masked.fasta",
         "swegene_common": "variants/swegen_common.vcf.gz",
         "ar_regions": "intervals/ar_regions.bed",
         "ts_regions": "intervals/ts_regions.bed",
         "fusion_regions": "intervals/fusion_regions.bed",
         "targets": {
             "test-regions": {
                 "cnvkit-ref": {
                     "THRUPLEX_PLASMASEQ": {
                         "CFDNA":
                         "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnn",
                         "N":
                         "intervals/targets/progression.THRUPLEX_PLASMASEQ.N.cnn"
                     }
                 },
                 "cnvkit-fix": {
                     "THRUPLEX_PLASMASEQ": {
                         "CFDNA":
                         "intervals/targets/progression.THRUPLEX_PLASMASEQ.CFDNA.cnvkit-fix.tsv"
                     }
                 },
                 "msisites": "intervals/targets/test-regions.msisites.tsv",
                 "targets-bed-slopped20":
                 "intervals/targets/test-regions-GRCh37.slopped20.bed",
                 "targets-interval_list":
                 "intervals/targets/test-regions-GRCh37.slopped20.interval_list",
                 "targets-interval_list-slopped20":
                 "intervals/targets/test-regions-GRCh37.slopped20.interval_list",
                 "blacklist-bed": None,
                 "purecn_targets": "intervals/targets/purecn.bed",
             }
         },
         "contest_vcfs": {
             "test-regions": "test_contest.vcf"
         },
         "vep_dir": "dummy_vep_dir"
     }
     self.test_cancer_capture = UniqueCapture("AL", "P-NA12877", "CFDNA",
                                              "03098850", "TD", "TT")
     self.test_normal_capture = UniqueCapture("AL", "P-NA12877", "N",
                                              "03098121", "TD", "TT")
     self.test_clinseq_pipeline = ClinseqPipeline(
         sample_data, self.ref_data, {}, "/tmp", "/nfs/LIQBIO/INBOX/exomes")