def validateAndSanitizeOptions(self,options) :

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeOptions(self,options)

        checkFixTabixListOption(options.noiseVcfList,"noise vcf")

        groomBamList(options.normalBamList,"normal sample")
        groomBamList(options.tumorBamList, "tumor sample")

        def checkRequired(bamList,label):
            if (bamList is None) or (len(bamList) == 0) :
                raise OptParseException("No %s sample BAM/CRAM files specified" % (label))

        checkRequired(options.tumorBamList,"tumor")

        bamSetChecker = BamSetChecker()

        def singleAppender(bamList,label):
            if bamList is None : return
            if len(bamList) > 1 :
                raise OptParseException("More than one %s sample BAM/CRAM files specified" % (label))
            bamSetChecker.appendBams(bamList,label)

        singleAppender(options.normalBamList,"normal")
        singleAppender(options.tumorBamList,"tumor")
        bamSetChecker.check(options.htsfileBin,
                     options.referenceFasta)
    def addWorkflowGroupOptions(self, group):
        group.add_option(
            "--probandAlignment",
            type="string",
            dest="probandBamList",
            metavar="FILE",
            action="append",
            help="Proband BAM or CRAM file. [required] (no default)")
        group.add_option(
            "--parentAlignment",
            type="string",
            dest="parentBamList",
            metavar="FILE",
            action="append",
            help=
            "BAM or CRAM file for a parent sample. (no default, submit argument one time for each parent)"
        )
        group.add_option(
            "--siblingAlignment",
            type="string",
            dest="siblingBamList",
            metavar="FILE",
            action="append",
            help=
            "BAM or CRAM file for a sibling sample. (no default, submit argument one time for each sibling)"
        )
        group.add_option(
            "--outputCallableRegions",
            dest="isOutputCallableRegions",
            action="store_true",
            help=
            "Output a bed file describing de-novo callable regions of the genome"
        )

        StrelkaSharedWorkflowOptionsBase.addWorkflowGroupOptions(self, group)
    def addExtendedGroupOptions(self, group):
        group.add_option(
            "--somaticSnvScoringModelFile",
            type="string",
            dest="somaticSnvScoringModelFile",
            metavar="FILE",
            help=
            "Provide a custom EVS model file for somatic SNVs (default: %default)"
        )
        group.add_option(
            "--somaticIndelScoringModelFile",
            type="string",
            dest="somaticIndelScoringModelFile",
            metavar="FILE",
            help=
            "Provide a custom EVS model file for somatic Indels (default: %default)"
        )
        group.add_option(
            "--noiseVcf",
            type="string",
            dest="noiseVcfList",
            metavar="FILE",
            action="append",
            help=
            "Noise vcf file (submit argument multiple times for more than one file)"
        )

        StrelkaSharedWorkflowOptionsBase.addExtendedGroupOptions(self, group)
    def validateAndSanitizeExistingOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeExistingOptions(
            self, options)
        groomBamList(options.probandBamList, "proband sample")
        groomBamList(options.parentBamList, "parent sample")
        groomBamList(options.siblingBamList, "sibling sample")
    def validateAndSanitizeOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeOptions(
            self, options)

        def checkFixTabixIndexedFileOption(tabixFile, label):
            checkOptionalTabixIndexedFile(tabixFile, label)
            if tabixFile is None: return None
            return os.path.abspath(tabixFile)

        if options.excludedRegions is not None:
            for excludeIndex in range(len(options.excludedRegions)):
                options.excludedRegions[excludeIndex] = \
                    checkFixTabixIndexedFileOption(options.excludedRegions[excludeIndex],"excluded-regions bed")

        if options.knownVariants is not None:
            options.knownVariants = \
                checkFixTabixIndexedFileOption(options.knownVariants,"known-variants vcf")

        groomBamList(options.bamList, "input")

        bamSetChecker = BamSetChecker()

        def singleAppender(bamList, label):
            if len(bamList) > 1:
                raise OptParseException(
                    "More than one %s sample BAM/CRAM files specified" %
                    (label))
            bamSetChecker.appendBams(bamList, label)

        singleAppender(options.bamList, "Input")
        bamSetChecker.check(options.htsfileBin, options.referenceFasta)
    def addWorkflowGroupOptions(self,group) :
        group.add_option("--normalBam", type="string",dest="normalBamList",metavar="FILE", action="append",
                         help="Normal sample BAM or CRAM file. (no default)")
        group.add_option("--tumorBam","--tumourBam", type="string",dest="tumorBamList",metavar="FILE", action="append",
                         help="Tumor sample BAM or CRAM file. [required] (no default)")
        group.add_option("--outputCallableRegions", dest="isOutputCallableRegions", action="store_true",
                         help="Output a bed file describing somatic callable regions of the genome")

        StrelkaSharedWorkflowOptionsBase.addWorkflowGroupOptions(self,group)
Пример #7
0
    def addExtendedGroupOptions(self,group) :
        # note undocumented library behavior: "dest" is optional, but not including it here will
        # cause the hidden option to always print
        group.add_option("--disableSequenceErrorEstimation", dest="isEstimateSequenceError", action="store_false",
                         help="Disable estimation of sequence error rates from data.")
        group.add_option("--useAllDataForSequenceErrorEstimation", dest="isErrorEstimationFromAllData", action="store_true",
                         help="Instead of sampling a subset of data for error estimation, use all data from sufficiently large chromosomes."
                              " This could greatly increase the workflow's runtime.")

        StrelkaSharedWorkflowOptionsBase.addExtendedGroupOptions(self,group)
Пример #8
0
    def validateAndSanitizeOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeOptions(
            self, options)

        groomBamList(options.bamList, "input")

        bamSetChecker = BamSetChecker()
        bamSetChecker.appendBams(options.bamList, "Input")
        bamSetChecker.check(options.samtoolsBin, options.referenceFasta)
Пример #9
0
    def addWorkflowGroupOptions(self, group):
        group.add_option(
            "--bam",
            type="string",
            dest="bamList",
            metavar="FILE",
            action="append",
            help="Sample BAM or CRAM file. [required] (no default)")

        StrelkaSharedWorkflowOptionsBase.addWorkflowGroupOptions(self, group)
    def addExtendedGroupOptions(self, group):
        group.add_option(
            "--noiseVcf",
            type="string",
            dest="noiseVcfList",
            metavar="FILE",
            action="append",
            help=
            "Noise vcf file (submit argument multiple times for more than one file)"
        )

        StrelkaSharedWorkflowOptionsBase.addExtendedGroupOptions(self, group)
Пример #11
0
    def addWorkflowGroupOptions(self,group) :
        group.add_option("--bam", type="string",dest="bamList",metavar="FILE", action="append",
                         help="Sample BAM or CRAM file. May be specified more than once, multiple inputs will be treated as each BAM file representing a different sample. [required] (no default)")
        group.add_option("--ploidy", type="string", dest="ploidyFilename", metavar="FILE",
                         help="Provide ploidy file in VCF. The VCF should include one sample column per input sample labeled with the same sample names found in the input BAM/CRAM RG header sections."
                              " Ploidy should be provided in records using the FORMAT/CN field, which are interpreted to span the range [POS+1, INFO/END]. Any CN value besides 1 or 0 will be treated as 2."
                              " File must be tabix indexed. (no default)")
        group.add_option("--noCompress", type="string", dest="noCompressBed", metavar="FILE",
                         help="Provide BED file of regions where gVCF block compression is not allowed. File must be bgzip-compressed/tabix-indexed. (no default)")
        group.add_option("--callContinuousVf", type="string", dest="callContinuousVf", metavar="CHROM", action="append",
                         help="Call variants on CHROM without a ploidy prior assumption, issuing calls with continuous variant frequencies (no default)")
        group.add_option("--rna", dest="isRNA", action="store_true",
                         help="Set options for RNA-Seq input.")

        StrelkaSharedWorkflowOptionsBase.addWorkflowGroupOptions(self,group)
    def validateAndSanitizeExistingOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeExistingOptions(
            self, options)
        groomBamList(options.normalBamList, "normal sample")
        groomBamList(options.tumorBamList, "tumor sample")

        checkFixTabixListOption(options.noiseVcfList, "noise vcf")

        options.somaticSnvScoringModelFile = validateFixExistingFileArg(
            options.somaticSnvScoringModelFile,
            "Somatic SNV empirical scoring file")
        options.somaticIndelScoringModelFile = validateFixExistingFileArg(
            options.somaticIndelScoringModelFile,
            "Somatic indel empirical scoring file")
    def validateOptionExistence(self, options):

        StrelkaSharedWorkflowOptionsBase.validateOptionExistence(self, options)

        bcheck = BamSetChecker()

        def singleAppender(bamList, label):
            if len(bamList) > 1:
                raise OptParseException(
                    "More than one %s sample BAM/CRAM files specified" %
                    (label))
            bcheck.appendBams(bamList, label)

        singleAppender(options.bamList, "Input")
        bcheck.check(options.htsfileBin, options.referenceFasta)
    def getOptionDefaults(self):

        self.configScriptDir = scriptDir
        defaults = StrelkaSharedWorkflowOptionsBase.getOptionDefaults(self)

        libexecDir = defaults["libexecDir"]

        configDir = os.path.abspath(
            os.path.join(scriptDir, "@THIS_RELATIVE_CONFIGDIR@"))
        assert os.path.isdir(configDir)

        defaults.update({
            'runDir':
            'StrelkaSomaticWorkflow',
            'strelkaSomaticBin':
            joinFile(libexecDir, exeFile("strelka2")),
            'minTier2Mapq':
            0,
            'snvScoringModelFile':
            joinFile(configDir, 'somaticSNVScoringModels.json'),
            'indelScoringModelFile':
            joinFile(configDir, 'somaticIndelScoringModels.json'),
            'isOutputCallableRegions':
            False,
            'noiseVcfList':
            None
        })
        return defaults
Пример #15
0
    def getOptionDefaults(self) :

        self.configScriptDir=scriptDir
        defaults=StrelkaSharedWorkflowOptionsBase.getOptionDefaults(self)

        libexecDir=defaults["libexecDir"]

        configDir=os.path.abspath(os.path.join(scriptDir,"@THIS_RELATIVE_CONFIGDIR@"))
        assert os.path.isdir(configDir)

        defaults.update({
            'runDir' : 'StrelkaGermlineWorkflow',
            'strelkaGermlineBin' : joinFile(libexecDir,exeFile("starling2")),
            'bgzip9Bin' : joinFile(libexecDir, exeFile("bgzip9")),
            'configDir' : configDir,
            'germlineSnvScoringModelFile' : joinFile(configDir,'germlineSNVScoringModels.json'),
            'germlineIndelScoringModelFile' : joinFile(configDir,'germlineIndelScoringModels.json'),
            'rnaSnvScoringModelFile' : joinFile(configDir,'RNASNVScoringModels.json'),
            'rnaIndelScoringModelFile' : joinFile(configDir,'RNAIndelScoringModels.json'),
            'callContinuousVf' : [],
            'getCountsBin' : joinFile(libexecDir,exeFile("GetSequenceErrorCounts")),
            'mergeCountsBin' : joinFile(libexecDir,exeFile("MergeSequenceErrorCounts")),
            'estimateVariantErrorRatesBin' : joinFile(libexecDir,exeFile("EstimateVariantErrorRates")),
            'thetaParamFile' : joinFile(configDir,'theta.json'),
            'indelErrorRateDefault' : joinFile(configDir,'indelErrorModel.json'),
            'isEstimateSequenceError' : True,
            'isErrorEstimationFromAllData' : False
            })
        return defaults
    def validateOptionExistence(self, options):

        StrelkaSharedWorkflowOptionsBase.validateOptionExistence(self, options)

        if len(options.probandBamList) != 1:
            raise OptParseException(
                "Must specify one proband sample BAM/CRAM file")

        if len(options.parentBamList) != 2:
            raise OptParseException(
                "Must specify two parent sample BAM/CRAM files")

        bcheck = BamSetChecker()
        bcheck.appendBams(options.probandBamList, "proband")
        bcheck.appendBams(options.parentBamList, "parent")
        bcheck.appendBams(options.siblingBamList, "sibling", isAllowEmpty=True)
        bcheck.check(options.htsfileBin, options.referenceFasta)
    def addWorkflowGroupOptions(self,group) :
        group.add_option("--bam", type="string",dest="bamList",metavar="FILE", action="append",
                         help="Sample BAM or CRAM file. [required] (no default)")
        group.add_option("--excludedRegions", type="string", metavar="FILE", action="append",
                         help="Provide BED file of regions to be excluded from allele count analysis. BED file must be tabix indexed."
                         "argument may be specified multiple times to provide multiple exclusion regions (no default)")
        group.add_option("--knownVariants", type="string", metavar="FILE",
                         help="Provide VCF file of indels and SNVs with known genotype assignments. VCF file must be tabix indexed."
                         "Matching alt alleles in the allele counts file will be marked with a known copy number. There is no"
                         "handling of hom ref assertions, whether remaining unlabeled loci are treated as known hom ref is left to the"
                         "downstream estimation model. Note this option does not promote the known variants to candidate or forced GT"
                         "status, to do so the same VCF file can be resubmitted to the appropriate additional argument. "
                         " Input VCF must be tabixed and normalized.")
        group.add_option("--reportObservedIndels", dest="isReportObservedIndels", action="store_true", default = False,
                         help="Report all observed indels by location in a separate BED file in addition to the"
                         "summary counts")

        StrelkaSharedWorkflowOptionsBase.addWorkflowGroupOptions(self,group)
    def validateAndSanitizeExistingOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeExistingOptions(
            self, options)
        groomBamList(options.bamList, "input")

        def checkFixTabixIndexedFileOption(tabixFile, label):
            checkOptionalTabixIndexedFile(tabixFile, label)
            if tabixFile is None: return None
            return os.path.abspath(tabixFile)

        if options.excludedRegions is not None:
            for excludeIndex in range(len(options.excludedRegions)):
                options.excludedRegions[excludeIndex] = \
                    checkFixTabixIndexedFileOption(options.excludedRegions[excludeIndex],"excluded-regions bed")

        if options.knownVariants is not None:
            options.knownVariants = \
                checkFixTabixIndexedFileOption(options.knownVariants,"known-variants vcf")
Пример #19
0
    def validateAndSanitizeOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeOptions(
            self, options)

        options.ploidyFilename = checkFixTabixIndexedFileOption(
            options.ploidyFilename, "ploidy file")
        options.noCompressBed = checkFixTabixIndexedFileOption(
            options.noCompressBed, "no-compress bed")
        if options.snvScoringModelFile is None:
            if options.isRNA:
                options.snvScoringModelFile = options.rnaSnvScoringModelFile
            else:
                options.snvScoringModelFile = options.germlineSnvScoringModelFile

        if options.indelScoringModelFile is None:
            if options.isRNA:
                options.indelScoringModelFile = options.rnaIndelScoringModelFile
            else:
                options.indelScoringModelFile = options.germlineIndelScoringModelFile

        # Disable dynamic error estimation for Exome
        if options.isExome:
            options.isEstimateSequenceError = False

        # Disable dynamic error estimation for RNA
        if options.isRNA:
            options.isEstimateSequenceError = False

        groomBamList(options.bamList, "input")

        def safeLen(x):
            if x is None: return 0
            return len(x)

        if safeLen(options.bamList) == 0:
            raise OptParseException(
                "No input sample alignment files specified")

        bamSetChecker = BamSetChecker()
        bamSetChecker.appendBams(options.bamList, "Input")
        bamSetChecker.check(options.htsfileBin, options.referenceFasta)
    def getOptionDefaults(self):

        self.configScriptDir = scriptDir
        defaults = StrelkaSharedWorkflowOptionsBase.getOptionDefaults(self)

        configDir = os.path.abspath(
            os.path.join(scriptDir, "@THIS_RELATIVE_CONFIGDIR@"))
        assert os.path.isdir(configDir)

        defaults.update({
            'runDir': 'PedicureWorkflow',
            'isOutputCallableRegions': False
        })
        return defaults
    def getOptionDefaults(self) :

        self.configScriptDir=scriptDir
        defaults=StrelkaSharedWorkflowOptionsBase.getOptionDefaults(self)

        libexecDir=defaults["libexecDir"]

        defaults.update({
            'runDir' : 'StrelkaNoiseWorkflow',
            'workflowScriptName' : 'runWorkflow.py',
            'bgcatBin' : joinFile(libexecDir,"bgzf_cat"),
            'bgzip9Bin' : joinFile(libexecDir,"bgzip9"),
            'snoiseBin' : joinFile(libexecDir,"strelkaNoiseExtractor")
            })
        return defaults
    def validateOptionExistence(self, options):

        StrelkaSharedWorkflowOptionsBase.validateOptionExistence(self, options)

        def checkRequired(bamList, label):
            if (bamList is None) or (len(bamList) == 0):
                raise OptParseException(
                    "No %s sample BAM/CRAM files specified" % (label))

        checkRequired(options.tumorBamList, "tumor")

        bcheck = BamSetChecker()

        def singleAppender(bamList, label):
            if bamList is None: return
            if len(bamList) > 1:
                raise OptParseException(
                    "More than one %s sample BAM/CRAM files specified" %
                    (label))
            bcheck.appendBams(bamList, label)

        singleAppender(options.normalBamList, "normal")
        singleAppender(options.tumorBamList, "tumor")
        bcheck.check(options.htsfileBin, options.referenceFasta)
    def getOptionDefaults(self) :

        self.configScriptDir=scriptDir
        defaults=StrelkaSharedWorkflowOptionsBase.getOptionDefaults(self)

        libexecDir=defaults["libexecDir"]

        configDir=os.path.abspath(os.path.join(scriptDir,"@THIS_RELATIVE_CONFIGDIR@"))
        assert os.path.isdir(configDir)

        defaults.update({
            'runDir' : 'SequenceAlleleCountsWorkflow',
            'workflowScriptName' : 'runWorkflow.py',
            'getCountsBin' : joinFile(libexecDir,exeFile("GetSequenceAlleleCounts")),
            'mergeCountsBin' : joinFile(libexecDir,exeFile("MergeSequenceAlleleCounts")),
            'extraCountsArguments' : None
            })
        return defaults
    def getOptionDefaults(self):

        self.configScriptDir = scriptDir
        defaults = StrelkaSharedWorkflowOptionsBase.getOptionDefaults(self)

        configDir = os.path.abspath(
            os.path.join(scriptDir, "@THIS_RELATIVE_CONFIGDIR@"))
        assert os.path.isdir(configDir)

        defaults.update({
            'runDir':
            'StrelkaSomaticWorkflow',
            "minTier2Mapq":
            0,
            'somaticSnvScoringModelFile':
            joinFile(configDir, 'somaticVariantScoringModels.json'),
            'somaticIndelScoringModelFile':
            None,  #joinFile(configDir,'somaticVariantScoringModels.json'),
            'isOutputCallableRegions':
            False,
            'noiseVcfList':
            None
        })
        return defaults
    def validateAndSanitizeExistingOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeExistingOptions(
            self, options)
        groomBamList(options.bamList, "input")
    def validateOptionExistence(self, options):

        StrelkaSharedWorkflowOptionsBase.validateOptionExistence(self, options)
        bcheck = BamSetChecker()
        bcheck.appendBams(options.bamList, "Input")
        bcheck.check(options.samtoolsBin, options.referenceFasta)