Пример #1
0
def concatenate_input_bams(config, input_bams, out = AtomicCmd.PIPE):
    """Transparent concatenation of input BAMs.

    Return a tuple containing a list of nodes (0 or 1), and an
    object which may be passed to the IN_STDIN of an AtomicCmd
    (either an AtomicCmd, or a filename). This allows transparent
    concatenation when multiple files are specified, while
    avoiding needless overhead when there is only 1 input file."""

    input_bams = safe_coerce_to_tuple(input_bams)
    if len(input_bams) == 1:
        return [], input_bams[0]

    jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
    params = AtomicJavaCmdBuilder(config, jar_file)
    params.set_kwargs(CHECK_JAR  = _picard_version(jar_file))

    if out == AtomicCmd.PIPE:
        params.set_kwargs(OUT_STDOUT = out)
        params.set_option("OUTPUT", "/dev/stdout", sep = "=")
    else:
        params.set_option("OUTPUT", out, sep = "=")

    params.set_option("CREATE_INDEX", "False", sep = "=")
    params.set_option("COMPRESSION_LEVEL",  0, sep = "=")

    for (index, filename) in enumerate(safe_coerce_to_tuple(input_bams), start = 1):
        params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
        params.set_kwargs(**{"IN_BAM_%02i" % index : filename})

    params.set_option("SO", "coordinate", sep = "=", fixed = False)

    cmd = params.finalize()
    return [cmd], cmd
Пример #2
0
    def customize(cls, reference, infiles, outfile, options, dependencies = ()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(options.jar_root,"GenomeAnalysisTK.jar")
        UnifiedGenotyper = AtomicJavaCmdBuilder(options,jar_file)
        UnifiedGenotyper.set_option("-R", "%(IN_REFERENCE)s")
        UnifiedGenotyper.set_option("-T", "UnifiedGenotyper")
        for bam in infiles:
            assert os.path.exists(bam), "Couldn't find input BAM: {}".format(bam)
            UnifiedGenotyper.add_option("-I", bam)
        UnifiedGenotyper.set_option("-o", "%(OUT_VCFFILES)s")
        UnifiedGenotyper.set_option("-stand_call_conf", "30.0")
        UnifiedGenotyper.set_option("-stand_emit_conf", "10.0")
        UnifiedGenotyper.set_option("-dcov", "200")
        #UnifiedGenotyper.set_option("-nct", "3")
        UnifiedGenotyper.set_option("-L", "chrUn2:1-19213991")
    

        UnifiedGenotyper.set_kwargs(
            IN_REFERENCE = reference,
            OUT_VCFFILES = outfile,
            OUT_VCF_IDX  = outfile + ".idx"
        )

        return {
            "commands" : {
                "unifiedgenotyper" : UnifiedGenotyper
            }
        }
Пример #3
0
    def customize(cls, config, input_bams, output_bam, output_metrics = None, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep = "=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep = "=")

        input_bams = safe_coerce_to_tuple(input_bams)
        for (index, filename) in enumerate(input_bams):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
            params.set_kwargs(**{("IN_BAM_%02i" % index) : filename})

        # Remove duplicates from output by default to save disk-space
        params.set_option("REMOVE_DUPLICATES", "True", sep = "=", fixed = False)

        params.set_kwargs(OUT_BAM     = output_bam,
                         OUT_BAI     = swap_ext(output_bam, ".bai"),
                         OUT_METRICS = output_metrics or swap_ext(output_bam, ".metrics"),
                         CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}
Пример #4
0
    def customize(cls,
                  config,
                  input_bams,
                  output_bam,
                  output_metrics=None,
                  dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")

        input_bams = safe_coerce_to_tuple(input_bams)
        for (index, filename) in enumerate(input_bams):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
            params.set_kwargs(**{("IN_BAM_%02i" % index): filename})

        # Remove duplicates from output by default to save disk-space
        params.set_option("REMOVE_DUPLICATES", "True", sep="=", fixed=False)

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics
                          or swap_ext(output_bam, ".metrics"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}
Пример #5
0
def concatenate_input_bams(config, input_bams, out=AtomicCmd.PIPE):
    """Transparent concatenation of input BAMs.

    Return a tuple containing a list of nodes (0 or 1), and an
    object which may be passed to the IN_STDIN of an AtomicCmd
    (either an AtomicCmd, or a filename). This allows transparent
    concatenation when multiple files are specified, while
    avoiding needless overhead when there is only 1 input file."""

    input_bams = safe_coerce_to_tuple(input_bams)
    if len(input_bams) == 1:
        return [], input_bams[0]

    jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
    params = AtomicJavaCmdBuilder(config, jar_file)
    params.set_kwargs(CHECK_JAR=_picard_version(jar_file))

    if out == AtomicCmd.PIPE:
        params.set_kwargs(OUT_STDOUT=out)
        params.set_option("OUTPUT", "/dev/stdout", sep="=")
    else:
        params.set_option("OUTPUT", out, sep="=")

    params.set_option("CREATE_INDEX", "False", sep="=")
    params.set_option("COMPRESSION_LEVEL", 0, sep="=")

    for (index, filename) in enumerate(safe_coerce_to_tuple(input_bams),
                                       start=1):
        params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
        params.set_kwargs(**{"IN_BAM_%02i" % index: filename})

    params.set_option("SO", "coordinate", sep="=", fixed=False)

    cmd = params.finalize()
    return [cmd], cmd
Пример #6
0
    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(jar_file))

        for (index, filename) in enumerate(input_bams, start=1):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
            params.set_kwargs(**{("IN_BAM_%02i" % index): filename})

        params.set_option("SO", "coordinate", sep="=", fixed=False)

        return {"command": params, "dependencies": dependencies}
Пример #7
0
    def customize(cls, config, input_bams, output_bam, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=")
        params.set_option("CREATE_INDEX", "True", sep = "=")
        params.set_kwargs(OUT_BAM = output_bam,
                         OUT_BAI = swap_ext(output_bam, ".bai"),
                         CHECK_JAR  = _picard_version(jar_file))

        for (index, filename) in enumerate(input_bams, start = 1):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
            params.set_kwargs(**{("IN_BAM_%02i" % index) : filename})

        params.set_option("SO", "coordinate", sep = "=", fixed = False)

        return {"command"      : params,
                "dependencies" : dependencies}