def constructor(self): self.input("bams", Array(BamBai())) self.input("createIndex", Boolean, default=True) self.input("maxRecordsInRam", Int, default=5000000) self.input("sample_name", str) self.step( "mergeSamFiles", Gatk4MergeSamFiles_4_1_2( bams=self.bams, useThreading=True, createIndex=self.createIndex, maxRecordsInRam=self.maxRecordsInRam, validationStringency="SILENT", sampleName=self.sample_name, ), ) ## Include step here to filter out -F 0x100 using samtools as secondary reads don't get mate cigar info self.step( "filterSecondary", SamToolsView_1_9( sam=self.mergeSamFiles.out, doNotOutputAlignmentsWithBitsSet="0x100", ), ) self.step("indexFilterBam", SamToolsIndex_1_9(bam=self.filterSecondary.out)) self.step( "fixMateInfo", Gatk4FixMateInformation_4_1_2_0( inputBam=self.indexFilterBam.out, ignoreMissingMates=False, addMateCigar=True, sortOrder="coordinate", outputPrefix=self.sample_name, ), ) self.step( "markDuplicatesUMI", Gatk4UmiAwareMarkDuplicatesWithMateCigar_4_1_2_0( inputBam=self.fixMateInfo.out, umiTagName="RX", maxEditDistanceToJoin=1, outputPrefix=self.sample_name, ), ) self.output("out", source=self.markDuplicatesUMI.out) self.output("umimetrics", source=self.markDuplicatesUMI.umimetrics) self.output("metrics", source=self.markDuplicatesUMI.metrics)
def constructor(self): # Inputs self.input("bam", BamBai) self.input("genecoverage_bed", Bed) self.input("region_bed", Bed) self.input("sample_name", String) self.input("genome_file", TextFile) # Steps # Add a step to remove secondary alignments self.step( "rmsecondaryalignments", SamToolsView_1_9(sam=self.bam, doNotOutputAlignmentsWithBitsSet="0x100"), ) self.step("indexbam", SamToolsIndex_1_9(bam=self.rmsecondaryalignments.out)) self.step( "gatk4collectinsertsizemetrics", Gatk4CollectInsertSizeMetrics_4_1_2(bam=self.indexbam.out), ) self.step( "bamflagstat", SamToolsFlagstat_1_9(bam=self.rmsecondaryalignments.out), ) self.step( "samtoolsview", SamToolsView_1_9( sam=self.rmsecondaryalignments.out, doNotOutputAlignmentsWithBitsSet="0x400", ), ) self.step("rmdupbamflagstat", SamToolsFlagstat_1_9(bam=self.samtoolsview.out)) self.step( "bedtoolsintersectbed", BedToolsIntersectBed_2_29_2( inputABam=self.samtoolsview.out, inputBBed=self.region_bed, genome=self.genome_file, sorted=True, ), ) self.step( "targetbamflagstat", SamToolsFlagstat_1_9(bam=self.bedtoolsintersectbed.out), ) self.step( "bedtoolscoveragebed", BedToolsCoverageBed_2_29_2( inputABed=self.region_bed, inputBBam=self.bedtoolsintersectbed.out, genome=self.genome_file, sorted=True, histogram=True, ), ) # Give all the output files to performance summary script self.step( "performancesummary", PerformanceSummaryLatest( flagstat=self.bamflagstat.out, collectInsertSizeMetrics=self.gatk4collectinsertsizemetrics. out, targetFlagstat=self.targetbamflagstat.out, coverage=self.bedtoolscoveragebed.out, rmdupFlagstat=self.rmdupbamflagstat.out, outputPrefix=self.sample_name, ), ) # Steps - Gene Coverage self.step( "bedtoolscoverage", BedToolsCoverageBed_2_29_2( inputABed=self.genecoverage_bed, inputBBam=self.samtoolsview.out, genome=self.genome_file, sorted=True, histogram=True, ), ) self.step( "genecoverage", GeneCoveragePerSampleLatest( sampleName=self.sample_name, bedtoolsOutputPath=self.bedtoolscoverage.out, ), ) # Outputs self.output("out", source=self.performancesummary.out) self.output("geneFileOut", source=self.genecoverage.geneFileOut) self.output("regionFileOut", source=self.genecoverage.regionFileOut)
def constructor(self): ##INPUTS self.input("bam", BamBai()) self.input("sample_name", String()) self.input("reference_folder", Directory()) self.input("intervals", Bed()) self.input("gemini_chromosomes", String(optional=True)) self.input("ploidy", String(optional=True), default="somatic") self.input("min_bq", Int(optional=True)) self.input("min_mq", Int(optional=True)) self.input("min_dp", Int(optional=True)) self.input("min_vaf", Float(optional=True)) self.input("vc_min_vq", Int(optional=True)) self.input("noise_level", Int(optional=True)) self.input("vqr_min_vq", Int(optional=True)) self.input("pisces_awk_script", File()) ## STEPS self.step( "primary_only", SamToolsView_1_9(sam=self.bam, doNotOutputAlignmentsWithBitsSet="0x100"), ) self.step( "index_primary_only_bam", SamToolsIndex_1_9(bam=self.primary_only.out), ) self.step( "gemini_read_preprocessing", PiscesGemini_5_3_0_0( inputBam=self.index_primary_only_bam, referenceFolder=self.reference_folder, samtoolsExecutable="samtools", chromosomeFilter=self.gemini_chromosomes, outputDir=".", piscesVersion="5.3.0.0", ), ) self.step( "pisces", PiscesVariantCaller_5_3_0_0( inputBam=self.gemini_read_preprocessing.bam, referenceFolder=self.reference_folder, outputDir=".", intervalBedFile=self.intervals, ploidy=self.ploidy, minimumBaseQuality=self.min_bq, minimumMappingQuality=self.min_mq, minimumVariantFrequency=self.min_vaf, noiseLevelForQModel=self.noise_level, minimumVariantFrequencyFilter=self.min_vaf, enableSingleStrandFilter="True", outputSBFiles="True", callMNVs="False", maxMNVLength=1, RMxNFilter="5,9,0.35", variantQualityFilter=self.vc_min_vq, crushVCF="False", gVCF="False", piscesVersion="5.3.0.0", ), ) self.step( "vqr", PiscesVariantQualityRecalibration_5_3_0_0( inputVcf=self.pisces.vcf, outputDir=".", baselineNoise=self.noise_level, minVariantQuality=self.vqr_min_vq, piscesVersion="5.3.0.0", ), ) piscesVcf = FirstOperator([self.vqr.vcf, self.pisces.vcf]) self.step( "fixSource", Awk(script=self.pisces_awk_script, input_files=piscesVcf), ) self.step("sort", BcfToolsSort_1_9(vcf=self.fixSource.out)) self.step("normalise", BcfToolsNorm_1_9(vcf=self.sort.out)) self.step("uncompress", UncompressArchive(file=self.normalise.out)) self.step( "filterpass", VcfToolsvcftools_0_1_16( vcf=self.uncompress.out.as_type(Vcf), removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) ## OUTPUTS self.output("variants", source=self.sort.out) self.output("out", source=self.filterpass.out) self.output("out_bam", source=self.gemini_read_preprocessing.bam)
def constructor(self): ## INPUTS self.input("bam", BamBai()) self.input("sample_name", String()) self.input("reference_folder", Directory()) self.input("intervals", Bed()) self.input("ploidy", String(optional=True), default="somatic") self.input("min_bq", Int(optional=True)) self.input("min_mq", Int(optional=True)) self.input("min_dp", Int(optional=True), default=100) self.input("min_vaf", Float(optional=True)) self.input("vc_min_vq", Int(optional=True)) self.input("noise_level", Int(optional=True)) self.input("vqr_min_vq", Int(optional=True)) self.input("pisces_awk_script", File()) ## STEPS self.step( "primary_only", SamToolsView_1_9(sam=self.bam, doNotOutputAlignmentsWithBitsSet="0x100"), ) self.step( "index_primary_only_bam", SamToolsIndex_1_9(bam=self.primary_only.out), ) self.step( "hygea_realignment", PiscesHygeaRealigner_5_2_10_49( inputBam=self.index_primary_only_bam, outputDir=".", referenceFolder=self.reference_folder, skipAndRemoveDuplicates="true", piscesVersion="5.2.10.49", ), ) self.step( "stitcher_read_joining", PiscesStitcher_5_2_10_49( inputBam=self.hygea_realignment.out, outputDir=".", sampleName=self.sample_name, piscesVersion="5.2.10.49", ), ) self.step( "stitcher_sort", SamToolsSort_1_9( bam=self.stitcher_read_joining.out, outputFilename=self.sample_name + ".bam", ), ) self.step("stitcher_index", SamToolsIndex_1_9(bam=self.stitcher_sort.out)) self.step( "pisces", PiscesVariantCaller_5_2_10_49( inputBam=self.stitcher_index.out, referenceFolder=self.reference_folder, outputDir=".", intervalBedFile=self.intervals, ploidy=self.ploidy, minimumBaseQuality=self.min_bq, minimumMappingQuality=self.min_mq, minimumVariantFrequency=self.min_vaf, minimumCoverage=self.min_dp, noiseLevelForQModel=self.noise_level, minimumVariantFrequencyFilter=self.min_vaf, enableSingleStrandFilter="true", callMNVs="false", maxMNVLength=1, RMxNFilter="5,9,0.35", variantQualityFilter=self.vc_min_vq, crushVCF="false", gVCF="false", piscesVersion="5.2.10.49", ), ) self.step( "vqr", PiscesVariantQualityRecalibration_5_2_10_49( inputVcf=self.pisces.vcf, outputDir=".", baselineNoise=self.noise_level, minVariantQuality=self.vqr_min_vq, piscesVersion="5.2.10.49", ), ) piscesVcf = FirstOperator([self.vqr.vcf, self.pisces.vcf]) self.step( "fixSource", Awk(script=self.pisces_awk_script, input_files=piscesVcf), ) self.step("sort", BcfToolsSort_1_9(vcf=self.fixSource.out)) self.step("normalise", BcfToolsNorm_1_9(vcf=self.sort.out)) self.step("uncompress", UncompressArchive(file=self.normalise.out)) self.step( "filterpass", VcfToolsvcftools_0_1_16( vcf=self.uncompress.out.as_type(Vcf), removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) ## OUTPUTs self.output("variants", source=self.sort.out) self.output("out", source=self.filterpass.out) self.output("out_bam", source=self.stitcher_index.out) ## OPTIONAL OUTPUTs self.output("hygea_options", source=self.hygea_realignment.used_options) self.output("stitcher_options", source=self.stitcher_read_joining.used_options) self.output("pisces_options", source=self.pisces.used_options) self.output("vqr_options", source=self.vqr.used_options)
Bam, BamBai, VcfIdx, Fasta, FastaWithIndexes, BedGz, Bed, BedTabix, ) from janis_bioinformatics.tools.samtools import SamToolsIndex_1_9 from janis_bioinformatics.tools.htslib import Tabix_1_9, BGZip_1_9 from janis_bioinformatics.tools.igvtools import IgvIndexFeature_2_5_3 transformations = [ JanisTransformation(Bam, BamBai, SamToolsIndex_1_9(), relevant_tool_input="bam"), JanisTransformation(Vcf, VcfIdx, IgvIndexFeature_2_5_3()), JanisTransformation( Vcf, CompressedVcf, BGZip_1_9(), relevant_tool_input="file", relevant_tool_output="out", ), JanisTransformation( CompressedVcf, VcfTabix, Tabix_1_9(), relevant_tool_input="inp", relevant_tool_output="out", ),