Exemplo n.º 1
0
    def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()):
        command = AtomicCmdBuilder(_PRESETS[algorithm.lower()])
        command.add_value("%(IN_FASTA)s")
        command.set_kwargs(IN_FASTA   = input_file,
                           OUT_STDOUT = output_file,
                           CHECK_VERSION = MAFFT_VERSION)

        return {"command"      : command,
                "dependencies" : dependencies}
Exemplo n.º 2
0
    def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()):
        command = AtomicCmdBuilder(_PRESETS[algorithm.lower()])
        command.add_value("%(IN_FASTA)s")
        command.set_kwargs(IN_FASTA   = input_file,
                           OUT_STDOUT = output_file,
                           CHECK_VERSION = MAFFT_VERSION)

        return {"command"      : command,
                "dependencies" : dependencies}
Exemplo n.º 3
0
    def __init__(self, input_file, k_groups, output_root,
                 samples=None, dependencies=()):
        self._samples = samples
        self._input_file = input_file
        self._k_groups = k_groups

        group_key = "Group(%i)" % (self._k_groups,)
        self._supervised = samples and any((row[group_key] != '-')
                                           for row in samples.itervalues())

        assert k_groups in (2, 3), k_groups
        prefix = os.path.splitext(os.path.basename(input_file))[0]
        output_prefix = os.path.join(output_root,
                                     "%s.%i" % (prefix, k_groups))

        cmd = AtomicCmdBuilder("admixture",
                               IN_FILE_BED=input_file,
                               IN_FILE_BIM=fileutils.swap_ext(input_file,
                                                              ".bim"),
                               IN_FILE_FAM=fileutils.swap_ext(input_file,
                                                              ".fam"),

                               TEMP_OUT_FILE_BED=prefix + ".bed",
                               TEMP_OUT_FILE_BIM=prefix + ".bim",
                               TEMP_OUT_FILE_FAM=prefix + ".fam",
                               TEMP_OUT_FILE_POP=prefix + ".pop",

                               OUT_P=output_prefix + ".P",
                               OUT_Q=output_prefix + ".Q",
                               OUT_STDOUT=output_prefix + ".log",

                               CHECK_VERSION=ADMIXTURE_VERSION,
                               set_cwd=True)

        cmd.set_option("-s", random.randint(0, 2 ** 16 - 1))

        if self._supervised:
            cmd.set_option("--supervised")

        cmd.add_value("%(TEMP_OUT_FILE_BED)s")
        cmd.add_value(int(k_groups))

        CommandNode.__init__(self,
                             description="<Admixture -> '%s.*''>"
                             % (output_prefix,),
                             command=cmd.finalize(),
                             dependencies=dependencies)
Exemplo n.º 4
0
    def __init__(self, input_file, k_groups, output_root,
                 samples=None, dependencies=()):
        self._samples = samples
        self._input_file = input_file
        self._k_groups = k_groups

        group_key = "Group(%i)" % (self._k_groups,)
        self._supervised = samples and any((row[group_key] != '-')
                                           for row in samples.itervalues())

        assert k_groups in (2, 3), k_groups
        prefix = os.path.splitext(os.path.basename(input_file))[0]
        output_prefix = os.path.join(output_root,
                                     "%s.%i" % (prefix, k_groups))

        cmd = AtomicCmdBuilder("admixture",
                               IN_FILE_BED=input_file,
                               IN_FILE_BIM=fileutils.swap_ext(input_file,
                                                              ".bim"),
                               IN_FILE_FAM=fileutils.swap_ext(input_file,
                                                              ".fam"),

                               TEMP_OUT_FILE_BED=prefix + ".bed",
                               TEMP_OUT_FILE_BIM=prefix + ".bim",
                               TEMP_OUT_FILE_FAM=prefix + ".fam",
                               TEMP_OUT_FILE_POP=prefix + ".pop",

                               OUT_P=output_prefix + ".P",
                               OUT_Q=output_prefix + ".Q",
                               OUT_STDOUT=output_prefix + ".log",

                               CHECK_VERSION=ADMIXTURE_VERSION,
                               set_cwd=True)

        cmd.set_option("-s", random.randint(0, 2 ** 16 - 1))

        if self._supervised:
            cmd.set_option("--supervised")

        cmd.add_value("%(TEMP_OUT_FILE_BED)s")
        cmd.add_value(int(k_groups))

        CommandNode.__init__(self,
                             description="<Admixture -> '%s.*''>"
                             % (output_prefix,),
                             command=cmd.finalize(),
                             dependencies=dependencies)
Exemplo n.º 5
0
    def __init__(self, config, reference, input_bam, output_bam, tags,
                 min_mapq=0, filter_unmapped=False, dependencies=()):
        flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"),
                                      IN_BAM=input_bam,
                                      OUT_STDOUT=AtomicCmd.PIPE)

        if min_mapq:
            flt_params.set_option("-q", min_mapq, sep="")
        if filter_unmapped:
            flt_params.set_option("-F", "0x4", sep="")

        flt_params.add_value("%(IN_BAM)s")

        jar_params = picard.picard_command(config, "AddOrReplaceReadGroups")
        jar_params.set_option("INPUT", "/dev/stdin", sep="=")
        # Output is written to a named pipe, since the JVM may, in some cases,
        # emit warning messages to stdout, resulting in a malformed BAM.
        jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        # Ensure that the BAM is sorted; this is required by the pipeline, and
        # needs to be done before calling calmd (avoiding pathologic runtimes).
        jar_params.set_option("SORT_ORDER", "coordinate", sep="=")

        # All tags are overwritten; ID is set since the default (e.g. '1')
        # causes problems with pysam due to type inference (is read as a length
        # 1 string, but written as a character).
        for tag in ("ID", "SM", "LB", "PU", "PL"):
            jar_params.set_option(tag, tags[tag], sep="=")

        jar_params.set_kwargs(IN_STDIN=flt_params,
                              TEMP_OUT_BAM="bam.pipe")

        calmd = AtomicCmdBuilder(["samtools", "calmd", "-b",
                                 "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                                 IN_REF=reference,
                                 TEMP_IN_BAM="bam.pipe",
                                 OUT_STDOUT=output_bam)

        commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)]
        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds(commands),
                            description=description,
                            dependencies=dependencies)
Exemplo n.º 6
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []

    class _AtomicCmdMock:
        def __init__(self, *args, **kwargs):
            assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],))
            assert_equal(kwargs, {"IN_FILE": "/in/file",
                                  "OUT_FILE": "/out/file",
                                  "set_cwd": True})
            was_called.append(True)

    with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd=True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE="/out/file",
                           IN_FILE="/in/file")

        builder.finalize()
        assert was_called
Exemplo n.º 7
0
    def __init__(self,
                 input_file,
                 k_groups,
                 output_root,
                 groups,
                 dependencies=()):
        self._groups = groups
        self._input_file = input_file

        prefix = os.path.splitext(os.path.basename(input_file))[0]
        output_prefix = os.path.join(output_root, "%s.%i" % (prefix, k_groups))

        cmd = AtomicCmdBuilder(
            "admixture",
            IN_FILE_BED=input_file,
            IN_FILE_BIM=fileutils.swap_ext(input_file, ".bim"),
            IN_FILE_FAM=fileutils.swap_ext(input_file, ".fam"),
            TEMP_OUT_FILE_BED=prefix + ".bed",
            TEMP_OUT_FILE_BIM=prefix + ".bim",
            TEMP_OUT_FILE_FAM=prefix + ".fam",
            TEMP_OUT_FILE_POP=prefix + ".pop",
            OUT_P=output_prefix + ".P",
            OUT_Q=output_prefix + ".Q",
            OUT_STDOUT=output_prefix + ".log",
            CHECK_VERSION=ADMIXTURE_VERSION,
            set_cwd=True,
        )

        cmd.set_option("-s", random.randint(0, 2**16 - 1))
        cmd.set_option("--supervised")

        cmd.add_value("%(TEMP_OUT_FILE_BED)s")
        cmd.add_value(int(k_groups))

        CommandNode.__init__(
            self,
            description="<Admixture -> '%s.*''>" % (output_prefix, ),
            command=cmd.finalize(),
            dependencies=dependencies,
        )
Exemplo n.º 8
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []

    class _AtomicCmdMock(object):
        def __init__(self, *args, **kwargs):
            assert_equal(args,
                         (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], ))
            assert_equal(kwargs, {
                "IN_FILE": "/in/file",
                "OUT_FILE": "/out/file",
                "set_cwd": True
            })
            was_called.append(True)

    with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd=True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file")

        builder.finalize()
        assert was_called
Exemplo n.º 9
0
def test_builder__add_value__two_values():
    builder = AtomicCmdBuilder("ls")
    builder.add_value("%(IN_FILE)s")
    builder.add_value("%(OUT_FILE)s")
    assert_equal(builder.call, ["ls", "%(IN_FILE)s", "%(OUT_FILE)s"])
Exemplo n.º 10
0
def test_builder__add_value__two_values():
    builder = AtomicCmdBuilder("ls")
    builder.add_value("%(IN_FILE)s")
    builder.add_value("%(OUT_FILE)s")
    assert_equal(builder.call, ["ls", "%(IN_FILE)s", "%(OUT_FILE)s"])