def picard_command(config, command): """Returns basic AtomicJavaCmdBuilder for Picard tools commands.""" jar_path = os.path.join(config.jar_root, _PICARD_JAR) if jar_path not in _PICARD_VERSION_CACHE: params = AtomicJavaCmdBuilder(jar_path, temp_root=config.temp_root, jre_options=config.jre_options) # Arbitrary command, since just '--version' does not work params.set_option("MarkDuplicates") params.set_option("--version") requirement = versions.Requirement(call=params.finalized_call, name="Picard tools", search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 124)) _PICARD_VERSION_CACHE[jar_path] = requirement version = _PICARD_VERSION_CACHE[jar_path] params = AtomicJavaCmdBuilder(jar_path, temp_root=config.temp_root, jre_options=config.jre_options, CHECK_JAR=version) params.set_option(command) return params
def customize(cls, config, input_bam, output_log = None, dependencies = ()): jar_file = os.path.join(config.jar_root, "ValidateSamFile.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("I", "%(IN_BAM)s", sep = "=") params.set_kwargs(IN_BAM = input_bam, OUT_STDOUT = output_log or swap_ext(input_bam, ".validated"), CHECK_JAR = _picard_version(jar_file)) return {"command" : params, "dependencies" : dependencies}
def _picard_version(config, jar_file): if jar_file not in _PICARD_VERSION_CACHE: params = AtomicJavaCmdBuilder(jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.add_value("--version") name = "Picard " + os.path.basename(jar_file) requirement = versions.Requirement(call=params.finalized_call, name=name, search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 82)) _PICARD_VERSION_CACHE[jar_file] = requirement return _PICARD_VERSION_CACHE[jar_file]
def _picard_version(config, jar_file): if jar_file not in _PICARD_VERSION_CACHE: params = AtomicJavaCmdBuilder(jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.add_value("--version") name = "Picard " + os.path.basename(jar_file) requirement = versions.Requirement(call=params.finalized_call, name=name, search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 82)) _PICARD_VERSION_CACHE[jar_file] = requirement return _PICARD_VERSION_CACHE[jar_file]
def picard_command(config, command): """Returns basic AtomicJavaCmdBuilder for Picard tools commands.""" jar_path = os.path.join(config.jar_root, _PICARD_JAR) if jar_path not in _PICARD_VERSION_CACHE: params = AtomicJavaCmdBuilder(jar_path, temp_root=config.temp_root, jre_options=config.jre_options) # Arbitrary command, since just '--version' does not work params.set_option("MarkDuplicates") params.set_option("--version") requirement = versions.Requirement(call=params.finalized_call, name="Picard tools", search=r"^(\d+)\.(\d+)", checks=versions.GE(1, 124)) _PICARD_VERSION_CACHE[jar_path] = requirement version = _PICARD_VERSION_CACHE[jar_path] params = AtomicJavaCmdBuilder(jar_path, temp_root=config.temp_root, jre_options=config.jre_options, CHECK_JAR=version) params.set_option(command) return params
def _get_gatk_version_check(config): """Returns a version-check object for the "GenomeAnalysisTK.jar" located at config.jar_root; for now, this check only serves to verify that the JAR can be executed, which may not be the case if the JRE is outdated. """ jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") if jar_file not in _GATK_VERSION: params = AtomicJavaCmdBuilder(jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.add_value("--version") # Any version is fine; for now just catch old JREs requirement = versions.Requirement( call=params.finalized_call, name="GenomeAnalysisTK", search=r"^(\d+)\.(\d+)", checks=versions.Any() ) _GATK_VERSION[jar_file] = requirement return _GATK_VERSION[jar_file]
def test_java_builder__defaults__call(): builder = AtomicJavaCmdBuilder("/path/Foo.jar", temp_root="/disk/tmp") assert_equal(builder.call, [ "java", "-server", "-Djava.io.tmpdir=/disk/tmp", "-Djava.awt.headless=true", "-Xmx4g", "-XX:+UseSerialGC", "-jar", "%(AUX_JAR)s" ])
def test_java_builder__multithreaded_gc(): builder = AtomicJavaCmdBuilder(JAVA_CFG, "/path/Foo.jar", gc_threads=3) assert_equal(builder.call, [ "java", "-server", "-Xmx4g", "-Djava.io.tmpdir=/disk/tmp", "-Djava.awt.headless=true", "-XX:ParallelGCThreads=3", "-jar", "%(AUX_JAR)s" ])
def test_java_builder__default__no_config(): builder = AtomicJavaCmdBuilder("/path/Foo.jar") assert_equal(builder.call, [ "java", "-server", "-Djava.io.tmpdir=%(TEMP_DIR)s", "-Djava.awt.headless=true", "-Xmx4g", "-XX:+UseSerialGC", "-jar", "%(AUX_JAR)s" ])
def test_java_builder__kwargs(): builder = AtomicJavaCmdBuilder("/path/Foo.jar", set_cwd=True) assert_equal( builder.kwargs, { "AUX_JAR": "/path/Foo.jar", "set_cwd": True, "CHECK_JRE": JAVA_VERSIONS[(1, 6)] })
def __init__(self, config, reference, infiles, outfile, dependencies=()): infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) command.set_option("-T", "RealignerTargetCreator") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-o", "%(OUT_INTERVALS)s") _set_input_files(command, infiles) command.set_kwargs( IN_REFERENCE=reference, IN_REF_DICT=fileutils.swap_ext(reference, ".dict"), OUT_INTERVALS=outfile, CHECK_GATK=_get_gatk_version_check(config), ) description = "<Indel Realigner (training): %s -> %r>" % (describe_files(infiles), outfile) CommandNode.__init__(self, description=description, command=command.finalize(), dependencies=dependencies)
def customize(cls, config, reference, dependencies=()): jar_file = os.path.join(config.jar_root, "CreateSequenceDictionary.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("R", "%(IN_REF)s", sep="=") params.set_option("O", "%(OUT_DICT)s", sep="=") params.set_kwargs(IN_REF=reference, OUT_DICT=swap_ext(reference, ".dict"), CHECK_JAR=_picard_version(jar_file)) return {"command": params, "dependencies": dependencies}
def __init__(self, config, reference, infiles, outfile, dependencies = ()): infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(config, jar_file) command.set_option("-T", "RealignerTargetCreator") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-o", "%(OUT_INTERVALS)s") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE = reference, IN_REF_DICT = fileutils.swap_ext(reference, ".dict"), OUT_INTERVALS = outfile) description = "<Train Indel Realigner: %i file(s) -> '%s'>" \ % (len(infiles), outfile) CommandNode.__init__(self, description = description, command = command.finalize(), dependencies = dependencies)
def _get_gatk_version_check(config): """Returns a version-check object for the "GenomeAnalysisTK.jar" located at config.jar_root; for now, this check only serves to verify that the JAR can be executed, which may not be the case if the JRE is outdated. """ jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") if jar_file not in _GATK_VERSION: params = AtomicJavaCmdBuilder(jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.add_value("--version") # Any version is fine; for now just catch old JREs requirement = versions.Requirement(call=params.finalized_call, name="GenomeAnalysisTK", search=r"^(\d+)\.(\d+)", checks=versions.Any()) _GATK_VERSION[jar_file] = requirement return _GATK_VERSION[jar_file]
def customize(cls, config, reference, dependencies = ()): jar_file = os.path.join(config.jar_root, "CreateSequenceDictionary.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("R", "%(IN_REF)s", sep = "=") params.set_option("O", "%(OUT_DICT)s", sep = "=") params.set_kwargs(IN_REF = reference, OUT_DICT = swap_ext(reference, ".dict"), CHECK_JAR = _picard_version(jar_file)) return {"command" : params, "dependencies" : dependencies}
def customize(cls, config, input_bam, output_log=None, dependencies=()): jar_file = os.path.join(config.jar_root, "ValidateSamFile.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("I", "%(IN_BAM)s", sep="=") params.set_kwargs(IN_BAM=input_bam, OUT_STDOUT=output_log or swap_ext(input_bam, ".validated"), CHECK_JAR=_picard_version(jar_file)) return {"command": params, "dependencies": dependencies}
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq = 0, dependencies = ()): flt = AtomicCmd(["samtools", "view", "-bu", "-F0x4", "-q%i" % min_mapq, "%(IN_BAM)s"], IN_BAM = input_bam, OUT_STDOUT = AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("INPUT", "/dev/stdin", sep = "=") params.set_option("OUTPUT", "/dev/stdout", sep = "=") params.set_option("QUIET", "true", sep = "=") params.set_option("COMPRESSION_LEVEL", "0", sep = "=") for (tag, value) in sorted(tags.iteritems()): if tag not in ("PG", "Target", "PU_src", "PU_cur"): params.set_option(tag, value, sep = "=") elif tag == "PU_src": params.set_option("PU", value, sep = "=") params.set_kwargs(IN_STDIN = flt, OUT_STDOUT = AtomicCmd.PIPE) annotate = params.finalize() calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"], IN_REF = reference, IN_STDIN = annotate, OUT_STDOUT = output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) CommandNode.__init__(self, command = ParallelCmds([flt, annotate, calmd]), description = description, dependencies = dependencies)
def test_java_builder__defaults__kwargs(): builder = AtomicJavaCmdBuilder(JAVA_CFG, "/path/Foo.jar") assert_equal(builder.kwargs, {"AUX_JAR": "/path/Foo.jar"})
def customize(cls, config, input_bams, output_bam, dependencies=()): jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=") params.set_option("CREATE_INDEX", "True", sep="=") params.set_option("SO", "coordinate", sep="=", fixed=False) params.add_multiple_options("I", input_bams, sep="=") params.set_kwargs(OUT_BAM=output_bam, OUT_BAI=swap_ext(output_bam, ".bai"), CHECK_JAR=_picard_version(config, jar_file)) return {"command": params, "dependencies": dependencies}
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, dependencies=()): flt = AtomicCmd([ "samtools", "view", "-bu", "-F0x4", "-q%i" % min_mapq, "%(IN_BAM)s" ], IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("INPUT", "/dev/stdin", sep="=") params.set_option("OUTPUT", "/dev/stdout", sep="=") params.set_option("QUIET", "true", sep="=") params.set_option("COMPRESSION_LEVEL", "0", sep="=") for (tag, value) in sorted(tags.iteritems()): if tag not in ("PG", "Target", "PU_src", "PU_cur"): params.set_option(tag, value, sep="=") elif tag == "PU_src": params.set_option("PU", value, sep="=") params.set_kwargs(IN_STDIN=flt, OUT_STDOUT=AtomicCmd.PIPE) annotate = params.finalize() calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"], IN_REF=reference, IN_STDIN=annotate, OUT_STDOUT=output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) CommandNode.__init__(self, command=ParallelCmds([flt, annotate, calmd]), description=description, dependencies=dependencies)
def __init__(self, config, reference, intervals, infiles, outfile, dependencies=()): self._basename = os.path.basename(outfile) infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) command.set_option("-T", "IndelRealigner") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-targetIntervals", "%(IN_INTERVALS)s") command.set_option("-o", "%(OUT_BAMFILE)s") command.set_option("--bam_compression", 0) command.set_option("--disable_bam_indexing") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE=reference, IN_REF_DICT=fileutils.swap_ext(reference, ".dict"), IN_INTERVALS=intervals, OUT_BAMFILE=outfile, CHECK_GATK=_get_gatk_version_check(config)) calmd = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], TEMP_IN_BAM=self._basename, IN_REF=reference, TEMP_OUT_STDOUT=self._basename + ".calmd", CHECK_VERSION=SAMTOOLS_VERSION) description = "<Indel Realigner (aligning): %s -> %r>" \ % (describe_files(infiles), outfile) CommandNode.__init__(self, description=description, command=ParallelCmds([command.finalize(), calmd]), dependencies=dependencies)
def customize(cls, config, input_bams, output_bam, dependencies=()): jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=") params.set_option("CREATE_INDEX", "True", sep="=") params.set_option("SO", "coordinate", sep="=", fixed=False) params.add_multiple_options("I", input_bams, sep="=") params.set_kwargs(OUT_BAM=output_bam, OUT_BAI=swap_ext(output_bam, ".bai"), CHECK_JAR=_picard_version(config, jar_file)) return {"command": params, "dependencies": dependencies}
def concatenate_input_bams(config, input_bams, out=AtomicCmd.PIPE): """Transparent concatenation of input BAMs. Return a tuple containing a list of nodes (0 or 1), and an object which may be passed to the IN_STDIN of an AtomicCmd (either an AtomicCmd, or a filename). This allows transparent concatenation when multiple files are specified, while avoiding needless overhead when there is only 1 input file.""" input_bams = safe_coerce_to_tuple(input_bams) if len(input_bams) == 1: return [], input_bams[0] jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_kwargs(CHECK_JAR=_picard_version(jar_file)) if out == AtomicCmd.PIPE: params.set_kwargs(OUT_STDOUT=out) params.set_option("OUTPUT", "/dev/stdout", sep="=") else: params.set_option("OUTPUT", out, sep="=") params.set_option("CREATE_INDEX", "False", sep="=") params.set_option("COMPRESSION_LEVEL", 0, sep="=") for (index, filename) in enumerate(safe_coerce_to_tuple(input_bams), start=1): params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=") params.set_kwargs(**{"IN_BAM_%02i" % index: filename}) params.set_option("SO", "coordinate", sep="=", fixed=False) cmd = params.finalize() return [cmd], cmd
def __init__(self, config, reference, infiles, outfile, dependencies=()): infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(config, jar_file) command.set_option("-T", "RealignerTargetCreator") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-o", "%(OUT_INTERVALS)s") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE=reference, IN_REF_DICT=fileutils.swap_ext(reference, ".dict"), OUT_INTERVALS=outfile) description = "<Train Indel Realigner: %i file(s) -> '%s'>" \ % (len(infiles), outfile) CommandNode.__init__(self, description=description, command=command.finalize(), dependencies=dependencies)
def customize(cls, config, input_bams, output_bam, output_metrics=None, dependencies=()): jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar") params = AtomicJavaCmdBuilder(config, jar_file) # Create .bai index, since it is required by a lot of other programs params.set_option("CREATE_INDEX", "True", sep="=") params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=") params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=") input_bams = safe_coerce_to_tuple(input_bams) for (index, filename) in enumerate(input_bams): params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=") params.set_kwargs(**{("IN_BAM_%02i" % index): filename}) # Remove duplicates from output by default to save disk-space params.set_option("REMOVE_DUPLICATES", "True", sep="=", fixed=False) params.set_kwargs(OUT_BAM=output_bam, OUT_BAI=swap_ext(output_bam, ".bai"), OUT_METRICS=output_metrics or swap_ext(output_bam, ".metrics"), CHECK_JAR=_picard_version(jar_file)) return {"command": params, "dependencies": dependencies}
def __init__(self, config, input_bams, pipename="input.bam"): self.pipe = pipename self.files = safe_coerce_to_tuple(input_bams) self.commands = [] self.kwargs = {"TEMP_IN_BAM": self.pipe} if len(self.files) > 1: jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(jar=jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.set_option("SO", "coordinate", sep="=", fixed=False) params.set_option("CREATE_INDEX", "False", sep="=") params.set_option("COMPRESSION_LEVEL", 0, sep="=") params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") params.add_multiple_options("I", input_bams, sep="=") params.set_kwargs(CHECK_JAR=_picard_version(config, jar_file), TEMP_OUT_BAM=self.pipe) self.commands = [params.finalize()] else: # Ensure that the actual command depends on the input self.kwargs["IN_FILE_00"] = self.files[0] self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")
def test_java_builder__kwargs(): builder = AtomicJavaCmdBuilder(JAVA_CFG, "/path/Foo.jar", set_cwd=True) assert_equal(builder.kwargs, {"AUX_JAR": "/path/Foo.jar", "set_cwd": True})
def customize(cls, config, input_bams, output_bam, output_metrics = None, dependencies = ()): jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar") params = AtomicJavaCmdBuilder(config, jar_file) # Create .bai index, since it is required by a lot of other programs params.set_option("CREATE_INDEX", "True", sep = "=") params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=") params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep = "=") input_bams = safe_coerce_to_tuple(input_bams) for (index, filename) in enumerate(input_bams): params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=") params.set_kwargs(**{("IN_BAM_%02i" % index) : filename}) # Remove duplicates from output by default to save disk-space params.set_option("REMOVE_DUPLICATES", "True", sep = "=", fixed = False) params.set_kwargs(OUT_BAM = output_bam, OUT_BAI = swap_ext(output_bam, ".bai"), OUT_METRICS = output_metrics or swap_ext(output_bam, ".metrics"), CHECK_JAR = _picard_version(jar_file)) return {"command" : params, "dependencies" : dependencies}
def customize(cls, reference, infiles, outfile, options, dependencies = ()): infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(options.jar_root,"GenomeAnalysisTK.jar") UnifiedGenotyper = AtomicJavaCmdBuilder(options,jar_file) UnifiedGenotyper.set_option("-R", "%(IN_REFERENCE)s") UnifiedGenotyper.set_option("-T", "UnifiedGenotyper") for bam in infiles: assert os.path.exists(bam), "Couldn't find input BAM: {}".format(bam) UnifiedGenotyper.add_option("-I", bam) UnifiedGenotyper.set_option("-o", "%(OUT_VCFFILES)s") UnifiedGenotyper.set_option("-stand_call_conf", "30.0") UnifiedGenotyper.set_option("-stand_emit_conf", "10.0") UnifiedGenotyper.set_option("-dcov", "200") #UnifiedGenotyper.set_option("-nct", "3") UnifiedGenotyper.set_option("-L", "chrUn2:1-19213991") UnifiedGenotyper.set_kwargs( IN_REFERENCE = reference, OUT_VCFFILES = outfile, OUT_VCF_IDX = outfile + ".idx" ) return { "commands" : { "unifiedgenotyper" : UnifiedGenotyper } }
def customize(cls, config, input_bams, output_bam, dependencies = ()): jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=") params.set_option("CREATE_INDEX", "True", sep = "=") params.set_kwargs(OUT_BAM = output_bam, OUT_BAI = swap_ext(output_bam, ".bai"), CHECK_JAR = _picard_version(jar_file)) for (index, filename) in enumerate(input_bams, start = 1): params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=") params.set_kwargs(**{("IN_BAM_%02i" % index) : filename}) params.set_option("SO", "coordinate", sep = "=", fixed = False) return {"command" : params, "dependencies" : dependencies}
def __init__(self, config, reference, infiles, outfile, dependencies=()): infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) command.set_option("-T", "RealignerTargetCreator") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-o", "%(OUT_INTERVALS)s") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE=reference, IN_REF_DICT=fileutils.swap_ext(reference, ".dict"), OUT_INTERVALS=outfile, CHECK_GATK=_get_gatk_version_check(config)) description = "<Indel Realigner (training): %s -> %r>" \ % (describe_files(infiles), outfile) CommandNode.__init__(self, description=description, command=command.finalize(), dependencies=dependencies)
def concatenate_input_bams(config, input_bams, out = AtomicCmd.PIPE): """Transparent concatenation of input BAMs. Return a tuple containing a list of nodes (0 or 1), and an object which may be passed to the IN_STDIN of an AtomicCmd (either an AtomicCmd, or a filename). This allows transparent concatenation when multiple files are specified, while avoiding needless overhead when there is only 1 input file.""" input_bams = safe_coerce_to_tuple(input_bams) if len(input_bams) == 1: return [], input_bams[0] jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_kwargs(CHECK_JAR = _picard_version(jar_file)) if out == AtomicCmd.PIPE: params.set_kwargs(OUT_STDOUT = out) params.set_option("OUTPUT", "/dev/stdout", sep = "=") else: params.set_option("OUTPUT", out, sep = "=") params.set_option("CREATE_INDEX", "False", sep = "=") params.set_option("COMPRESSION_LEVEL", 0, sep = "=") for (index, filename) in enumerate(safe_coerce_to_tuple(input_bams), start = 1): params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=") params.set_kwargs(**{"IN_BAM_%02i" % index : filename}) params.set_option("SO", "coordinate", sep = "=", fixed = False) cmd = params.finalize() return [cmd], cmd
def __init__(self, config, input_bams, pipename="input.bam"): self.pipe = pipename self.files = safe_coerce_to_tuple(input_bams) self.commands = [] self.kwargs = {"TEMP_IN_BAM": self.pipe} if len(self.files) > 1: jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(jar=jar_file, temp_root=config.temp_root, jre_options=config.jre_options) params.set_option("SO", "coordinate", sep="=", fixed=False) params.set_option("CREATE_INDEX", "False", sep="=") params.set_option("COMPRESSION_LEVEL", 0, sep="=") params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") params.add_multiple_options("I", input_bams, sep="=") params.set_kwargs(CHECK_JAR=_picard_version(config, jar_file), TEMP_OUT_BAM=self.pipe) self.commands = [params.finalize()] else: # Ensure that the actual command depends on the input self.kwargs["IN_FILE_00"] = self.files[0] self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")
def customize(cls, config, input_bams, output_bam, dependencies=()): jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=") params.set_option("CREATE_INDEX", "True", sep="=") params.set_kwargs(OUT_BAM=output_bam, OUT_BAI=swap_ext(output_bam, ".bai"), CHECK_JAR=_picard_version(jar_file)) for (index, filename) in enumerate(input_bams, start=1): params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=") params.set_kwargs(**{("IN_BAM_%02i" % index): filename}) params.set_option("SO", "coordinate", sep="=", fixed=False) return {"command": params, "dependencies": dependencies}
def __init__(self, config, reference, intervals, infiles, outfile, dependencies = ()): self._basename = os.path.basename(outfile) infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(config, jar_file) command.set_option("-T", "IndelRealigner") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-targetIntervals", "%(IN_INTERVALS)s") command.set_option("-o", "%(OUT_BAMFILE)s") command.set_option("--bam_compression", 0) command.set_option("--disable_bam_indexing") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE = reference, IN_REF_DICT = fileutils.swap_ext(reference, ".dict"), IN_INTERVALS = intervals, OUT_BAMFILE = outfile) calmd = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], TEMP_IN_BAM = self._basename, IN_REF = reference, TEMP_OUT_STDOUT = self._basename + ".calmd") description = "<Indel Realign: %i file(s) -> '%s'>" \ % (len(infiles), outfile) CommandNode.__init__(self, description = description, command = ParallelCmds([command.finalize(), calmd]), dependencies = dependencies)
def customize(cls, config, input_bams, output_bam, output_metrics=None, keep_dupes=False, dependencies=()): jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar") params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) # Create .bai index, since it is required by a lot of other programs params.set_option("CREATE_INDEX", "True", sep="=") params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=") params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=") params.add_multiple_options("I", input_bams, sep="=") if not keep_dupes: # Remove duplicates from output by default to save disk-space params.set_option("REMOVE_DUPLICATES", "True", sep="=", fixed=False) output_metrics = output_metrics or swap_ext(output_bam, ".metrics") params.set_kwargs(OUT_BAM=output_bam, OUT_BAI=swap_ext(output_bam, ".bai"), OUT_METRICS=output_metrics, CHECK_JAR=_picard_version(config, jar_file)) return {"command": params, "dependencies": dependencies}
def test_java_builder__defaults__kwargs(): builder = AtomicJavaCmdBuilder("/path/Foo.jar") assert_equal(builder.kwargs, { "AUX_JAR": "/path/Foo.jar", "CHECK_JRE": JAVA_VERSIONS[(1, 6)] })
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): call = ["samtools", "view", "-bu"] if min_mapq > 0: call.append("-q%i" % min_mapq) if filter_unmapped: call.append("-F0x4") call.append("%(IN_BAM)s") flt = AtomicCmd(call, IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(jar=jar_file, jre_options=config.jre_options) params.set_option("INPUT", "/dev/stdin", sep="=") params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") params.set_option("COMPRESSION_LEVEL", "0", sep="=") params.set_option("SORT_ORDER", "coordinate", sep="=") for tag in ("SM", "LB", "PU", "PL"): params.set_option(tag, tags[tag], sep="=") params.set_kwargs(IN_STDIN=flt, TEMP_OUT_BAM="bam.pipe") annotate = params.finalize() calmd = AtomicCmd( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds([flt, annotate, calmd]), description=description, dependencies=dependencies)
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): call = ["samtools", "view", "-bu"] if min_mapq > 0: call.append("-q%i" % min_mapq) if filter_unmapped: call.append("-F0x4") call.append("%(IN_BAM)s") flt = AtomicCmd(call, IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(jar=jar_file, jre_options=config.jre_options) params.set_option("INPUT", "/dev/stdin", sep="=") params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") params.set_option("COMPRESSION_LEVEL", "0", sep="=") params.set_option("SORT_ORDER", "coordinate", sep="=") for tag in ("SM", "LB", "PU", "PL"): params.set_option(tag, tags[tag], sep="=") params.set_kwargs(IN_STDIN=flt, TEMP_OUT_BAM="bam.pipe") annotate = params.finalize() calmd = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds([flt, annotate, calmd]), description=description, dependencies=dependencies)
def customize(cls, config, input_bams, output_bam, output_metrics=None, keep_dupes=False, dependencies=()): jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar") params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options) # Create .bai index, since it is required by a lot of other programs params.set_option("CREATE_INDEX", "True", sep="=") params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=") params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=") params.add_multiple_options("I", input_bams, sep="=") if not keep_dupes: # Remove duplicates from output by default to save disk-space params.set_option("REMOVE_DUPLICATES", "True", sep="=", fixed=False) output_metrics = output_metrics or swap_ext(output_bam, ".metrics") params.set_kwargs(OUT_BAM=output_bam, OUT_BAI=swap_ext(output_bam, ".bai"), OUT_METRICS=output_metrics, CHECK_JAR=_picard_version(config, jar_file)) return {"command": params, "dependencies": dependencies}