def constructor(self):
        self.input("bams", Array(BamBai()))
        self.input("createIndex", Boolean, default=True)
        self.input("maxRecordsInRam", Int, default=5000000)
        self.input("sample_name", str)

        self.step(
            "mergeSamFiles",
            Gatk4MergeSamFiles_4_1_2(
                bams=self.bams,
                useThreading=True,
                createIndex=self.createIndex,
                maxRecordsInRam=self.maxRecordsInRam,
                validationStringency="SILENT",
                sampleName=self.sample_name,
            ),
        )

        ## Include step here to filter out -F 0x100 using samtools as secondary reads don't get mate cigar info
        self.step(
            "filterSecondary",
            SamToolsView_1_9(
                sam=self.mergeSamFiles.out,
                doNotOutputAlignmentsWithBitsSet="0x100",
            ),
        )

        self.step("indexFilterBam",
                  SamToolsIndex_1_9(bam=self.filterSecondary.out))

        self.step(
            "fixMateInfo",
            Gatk4FixMateInformation_4_1_2_0(
                inputBam=self.indexFilterBam.out,
                ignoreMissingMates=False,
                addMateCigar=True,
                sortOrder="coordinate",
                outputPrefix=self.sample_name,
            ),
        )

        self.step(
            "markDuplicatesUMI",
            Gatk4UmiAwareMarkDuplicatesWithMateCigar_4_1_2_0(
                inputBam=self.fixMateInfo.out,
                umiTagName="RX",
                maxEditDistanceToJoin=1,
                outputPrefix=self.sample_name,
            ),
        )

        self.output("out", source=self.markDuplicatesUMI.out)
        self.output("umimetrics", source=self.markDuplicatesUMI.umimetrics)
        self.output("metrics", source=self.markDuplicatesUMI.metrics)
Пример #2
0
    def constructor(self):

        # Inputs
        self.input("bam", BamBai)
        self.input("genecoverage_bed", Bed)
        self.input("region_bed", Bed)
        self.input("sample_name", String)
        self.input("genome_file", TextFile)
        # Steps
        # Add a step to remove secondary alignments
        self.step(
            "rmsecondaryalignments",
            SamToolsView_1_9(sam=self.bam,
                             doNotOutputAlignmentsWithBitsSet="0x100"),
        )
        self.step("indexbam",
                  SamToolsIndex_1_9(bam=self.rmsecondaryalignments.out))
        self.step(
            "gatk4collectinsertsizemetrics",
            Gatk4CollectInsertSizeMetrics_4_1_2(bam=self.indexbam.out),
        )
        self.step(
            "bamflagstat",
            SamToolsFlagstat_1_9(bam=self.rmsecondaryalignments.out),
        )
        self.step(
            "samtoolsview",
            SamToolsView_1_9(
                sam=self.rmsecondaryalignments.out,
                doNotOutputAlignmentsWithBitsSet="0x400",
            ),
        )
        self.step("rmdupbamflagstat",
                  SamToolsFlagstat_1_9(bam=self.samtoolsview.out))
        self.step(
            "bedtoolsintersectbed",
            BedToolsIntersectBed_2_29_2(
                inputABam=self.samtoolsview.out,
                inputBBed=self.region_bed,
                genome=self.genome_file,
                sorted=True,
            ),
        )
        self.step(
            "targetbamflagstat",
            SamToolsFlagstat_1_9(bam=self.bedtoolsintersectbed.out),
        )
        self.step(
            "bedtoolscoveragebed",
            BedToolsCoverageBed_2_29_2(
                inputABed=self.region_bed,
                inputBBam=self.bedtoolsintersectbed.out,
                genome=self.genome_file,
                sorted=True,
                histogram=True,
            ),
        )
        # Give all the output files to performance summary script
        self.step(
            "performancesummary",
            PerformanceSummaryLatest(
                flagstat=self.bamflagstat.out,
                collectInsertSizeMetrics=self.gatk4collectinsertsizemetrics.
                out,
                targetFlagstat=self.targetbamflagstat.out,
                coverage=self.bedtoolscoveragebed.out,
                rmdupFlagstat=self.rmdupbamflagstat.out,
                outputPrefix=self.sample_name,
            ),
        )

        # Steps - Gene Coverage
        self.step(
            "bedtoolscoverage",
            BedToolsCoverageBed_2_29_2(
                inputABed=self.genecoverage_bed,
                inputBBam=self.samtoolsview.out,
                genome=self.genome_file,
                sorted=True,
                histogram=True,
            ),
        )
        self.step(
            "genecoverage",
            GeneCoveragePerSampleLatest(
                sampleName=self.sample_name,
                bedtoolsOutputPath=self.bedtoolscoverage.out,
            ),
        )

        # Outputs
        self.output("out", source=self.performancesummary.out)
        self.output("geneFileOut", source=self.genecoverage.geneFileOut)
        self.output("regionFileOut", source=self.genecoverage.regionFileOut)
    def constructor(self):
        ##INPUTS
        self.input("bam", BamBai())
        self.input("sample_name", String())

        self.input("reference_folder", Directory())
        self.input("intervals", Bed())

        self.input("gemini_chromosomes", String(optional=True))

        self.input("ploidy", String(optional=True), default="somatic")
        self.input("min_bq", Int(optional=True))
        self.input("min_mq", Int(optional=True))
        self.input("min_dp", Int(optional=True))
        self.input("min_vaf", Float(optional=True))
        self.input("vc_min_vq", Int(optional=True))
        self.input("noise_level", Int(optional=True))
        self.input("vqr_min_vq", Int(optional=True))
        self.input("pisces_awk_script", File())

        ## STEPS
        self.step(
            "primary_only",
            SamToolsView_1_9(sam=self.bam,
                             doNotOutputAlignmentsWithBitsSet="0x100"),
        )

        self.step(
            "index_primary_only_bam",
            SamToolsIndex_1_9(bam=self.primary_only.out),
        )

        self.step(
            "gemini_read_preprocessing",
            PiscesGemini_5_3_0_0(
                inputBam=self.index_primary_only_bam,
                referenceFolder=self.reference_folder,
                samtoolsExecutable="samtools",
                chromosomeFilter=self.gemini_chromosomes,
                outputDir=".",
                piscesVersion="5.3.0.0",
            ),
        )

        self.step(
            "pisces",
            PiscesVariantCaller_5_3_0_0(
                inputBam=self.gemini_read_preprocessing.bam,
                referenceFolder=self.reference_folder,
                outputDir=".",
                intervalBedFile=self.intervals,
                ploidy=self.ploidy,
                minimumBaseQuality=self.min_bq,
                minimumMappingQuality=self.min_mq,
                minimumVariantFrequency=self.min_vaf,
                noiseLevelForQModel=self.noise_level,
                minimumVariantFrequencyFilter=self.min_vaf,
                enableSingleStrandFilter="True",
                outputSBFiles="True",
                callMNVs="False",
                maxMNVLength=1,
                RMxNFilter="5,9,0.35",
                variantQualityFilter=self.vc_min_vq,
                crushVCF="False",
                gVCF="False",
                piscesVersion="5.3.0.0",
            ),
        )

        self.step(
            "vqr",
            PiscesVariantQualityRecalibration_5_3_0_0(
                inputVcf=self.pisces.vcf,
                outputDir=".",
                baselineNoise=self.noise_level,
                minVariantQuality=self.vqr_min_vq,
                piscesVersion="5.3.0.0",
            ),
        )

        piscesVcf = FirstOperator([self.vqr.vcf, self.pisces.vcf])

        self.step(
            "fixSource",
            Awk(script=self.pisces_awk_script, input_files=piscesVcf),
        )

        self.step("sort", BcfToolsSort_1_9(vcf=self.fixSource.out))

        self.step("normalise", BcfToolsNorm_1_9(vcf=self.sort.out))

        self.step("uncompress", UncompressArchive(file=self.normalise.out))

        self.step(
            "filterpass",
            VcfToolsvcftools_0_1_16(
                vcf=self.uncompress.out.as_type(Vcf),
                removeFileteredAll=True,
                recode=True,
                recodeINFOAll=True,
            ),
        )

        ## OUTPUTS
        self.output("variants", source=self.sort.out)

        self.output("out", source=self.filterpass.out)

        self.output("out_bam", source=self.gemini_read_preprocessing.bam)
Пример #4
0
    def constructor(self):
        ## INPUTS
        self.input("bam", BamBai())
        self.input("sample_name", String())
        self.input("reference_folder", Directory())
        self.input("intervals", Bed())
        self.input("ploidy", String(optional=True), default="somatic")
        self.input("min_bq", Int(optional=True))
        self.input("min_mq", Int(optional=True))
        self.input("min_dp", Int(optional=True), default=100)
        self.input("min_vaf", Float(optional=True))
        self.input("vc_min_vq", Int(optional=True))
        self.input("noise_level", Int(optional=True))
        self.input("vqr_min_vq", Int(optional=True))
        self.input("pisces_awk_script", File())

        ## STEPS
        self.step(
            "primary_only",
            SamToolsView_1_9(sam=self.bam,
                             doNotOutputAlignmentsWithBitsSet="0x100"),
        )

        self.step(
            "index_primary_only_bam",
            SamToolsIndex_1_9(bam=self.primary_only.out),
        )

        self.step(
            "hygea_realignment",
            PiscesHygeaRealigner_5_2_10_49(
                inputBam=self.index_primary_only_bam,
                outputDir=".",
                referenceFolder=self.reference_folder,
                skipAndRemoveDuplicates="true",
                piscesVersion="5.2.10.49",
            ),
        )

        self.step(
            "stitcher_read_joining",
            PiscesStitcher_5_2_10_49(
                inputBam=self.hygea_realignment.out,
                outputDir=".",
                sampleName=self.sample_name,
                piscesVersion="5.2.10.49",
            ),
        )

        self.step(
            "stitcher_sort",
            SamToolsSort_1_9(
                bam=self.stitcher_read_joining.out,
                outputFilename=self.sample_name + ".bam",
            ),
        )

        self.step("stitcher_index",
                  SamToolsIndex_1_9(bam=self.stitcher_sort.out))

        self.step(
            "pisces",
            PiscesVariantCaller_5_2_10_49(
                inputBam=self.stitcher_index.out,
                referenceFolder=self.reference_folder,
                outputDir=".",
                intervalBedFile=self.intervals,
                ploidy=self.ploidy,
                minimumBaseQuality=self.min_bq,
                minimumMappingQuality=self.min_mq,
                minimumVariantFrequency=self.min_vaf,
                minimumCoverage=self.min_dp,
                noiseLevelForQModel=self.noise_level,
                minimumVariantFrequencyFilter=self.min_vaf,
                enableSingleStrandFilter="true",
                callMNVs="false",
                maxMNVLength=1,
                RMxNFilter="5,9,0.35",
                variantQualityFilter=self.vc_min_vq,
                crushVCF="false",
                gVCF="false",
                piscesVersion="5.2.10.49",
            ),
        )

        self.step(
            "vqr",
            PiscesVariantQualityRecalibration_5_2_10_49(
                inputVcf=self.pisces.vcf,
                outputDir=".",
                baselineNoise=self.noise_level,
                minVariantQuality=self.vqr_min_vq,
                piscesVersion="5.2.10.49",
            ),
        )

        piscesVcf = FirstOperator([self.vqr.vcf, self.pisces.vcf])

        self.step(
            "fixSource",
            Awk(script=self.pisces_awk_script, input_files=piscesVcf),
        )

        self.step("sort", BcfToolsSort_1_9(vcf=self.fixSource.out))

        self.step("normalise", BcfToolsNorm_1_9(vcf=self.sort.out))

        self.step("uncompress", UncompressArchive(file=self.normalise.out))

        self.step(
            "filterpass",
            VcfToolsvcftools_0_1_16(
                vcf=self.uncompress.out.as_type(Vcf),
                removeFileteredAll=True,
                recode=True,
                recodeINFOAll=True,
            ),
        )

        ## OUTPUTs
        self.output("variants", source=self.sort.out)

        self.output("out", source=self.filterpass.out)

        self.output("out_bam", source=self.stitcher_index.out)
        ## OPTIONAL OUTPUTs
        self.output("hygea_options",
                    source=self.hygea_realignment.used_options)
        self.output("stitcher_options",
                    source=self.stitcher_read_joining.used_options)
        self.output("pisces_options", source=self.pisces.used_options)
        self.output("vqr_options", source=self.vqr.used_options)
Пример #5
0
    Bam,
    BamBai,
    VcfIdx,
    Fasta,
    FastaWithIndexes,
    BedGz,
    Bed,
    BedTabix,
)

from janis_bioinformatics.tools.samtools import SamToolsIndex_1_9
from janis_bioinformatics.tools.htslib import Tabix_1_9, BGZip_1_9
from janis_bioinformatics.tools.igvtools import IgvIndexFeature_2_5_3

transformations = [
    JanisTransformation(Bam, BamBai, SamToolsIndex_1_9(), relevant_tool_input="bam"),
    JanisTransformation(Vcf, VcfIdx, IgvIndexFeature_2_5_3()),
    JanisTransformation(
        Vcf,
        CompressedVcf,
        BGZip_1_9(),
        relevant_tool_input="file",
        relevant_tool_output="out",
    ),
    JanisTransformation(
        CompressedVcf,
        VcfTabix,
        Tabix_1_9(),
        relevant_tool_input="inp",
        relevant_tool_output="out",
    ),