def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()): command = AtomicCmdBuilder(_PRESETS[algorithm.lower()]) command.add_value("%(IN_FASTA)s") command.set_kwargs(IN_FASTA = input_file, OUT_STDOUT = output_file, CHECK_VERSION = MAFFT_VERSION) return {"command" : command, "dependencies" : dependencies}
def __init__(self, input_file, k_groups, output_root, samples=None, dependencies=()): self._samples = samples self._input_file = input_file self._k_groups = k_groups group_key = "Group(%i)" % (self._k_groups,) self._supervised = samples and any((row[group_key] != '-') for row in samples.itervalues()) assert k_groups in (2, 3), k_groups prefix = os.path.splitext(os.path.basename(input_file))[0] output_prefix = os.path.join(output_root, "%s.%i" % (prefix, k_groups)) cmd = AtomicCmdBuilder("admixture", IN_FILE_BED=input_file, IN_FILE_BIM=fileutils.swap_ext(input_file, ".bim"), IN_FILE_FAM=fileutils.swap_ext(input_file, ".fam"), TEMP_OUT_FILE_BED=prefix + ".bed", TEMP_OUT_FILE_BIM=prefix + ".bim", TEMP_OUT_FILE_FAM=prefix + ".fam", TEMP_OUT_FILE_POP=prefix + ".pop", OUT_P=output_prefix + ".P", OUT_Q=output_prefix + ".Q", OUT_STDOUT=output_prefix + ".log", CHECK_VERSION=ADMIXTURE_VERSION, set_cwd=True) cmd.set_option("-s", random.randint(0, 2 ** 16 - 1)) if self._supervised: cmd.set_option("--supervised") cmd.add_value("%(TEMP_OUT_FILE_BED)s") cmd.add_value(int(k_groups)) CommandNode.__init__(self, description="<Admixture -> '%s.*''>" % (output_prefix,), command=cmd.finalize(), dependencies=dependencies)
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock: def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],)) assert_equal(kwargs, {"IN_FILE": "/in/file", "OUT_FILE": "/out/file", "set_cwd": True}) was_called.append(True) with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd=True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file") builder.finalize() assert was_called
def __init__(self, input_file, k_groups, output_root, groups, dependencies=()): self._groups = groups self._input_file = input_file prefix = os.path.splitext(os.path.basename(input_file))[0] output_prefix = os.path.join(output_root, "%s.%i" % (prefix, k_groups)) cmd = AtomicCmdBuilder( "admixture", IN_FILE_BED=input_file, IN_FILE_BIM=fileutils.swap_ext(input_file, ".bim"), IN_FILE_FAM=fileutils.swap_ext(input_file, ".fam"), TEMP_OUT_FILE_BED=prefix + ".bed", TEMP_OUT_FILE_BIM=prefix + ".bim", TEMP_OUT_FILE_FAM=prefix + ".fam", TEMP_OUT_FILE_POP=prefix + ".pop", OUT_P=output_prefix + ".P", OUT_Q=output_prefix + ".Q", OUT_STDOUT=output_prefix + ".log", CHECK_VERSION=ADMIXTURE_VERSION, set_cwd=True, ) cmd.set_option("-s", random.randint(0, 2**16 - 1)) cmd.set_option("--supervised") cmd.add_value("%(TEMP_OUT_FILE_BED)s") cmd.add_value(int(k_groups)) CommandNode.__init__( self, description="<Admixture -> '%s.*''>" % (output_prefix, ), command=cmd.finalize(), dependencies=dependencies, )
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock(object): def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], )) assert_equal(kwargs, { "IN_FILE": "/in/file", "OUT_FILE": "/out/file", "set_cwd": True }) was_called.append(True) with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd=True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file") builder.finalize() assert was_called
def test_builder__add_value__two_values(): builder = AtomicCmdBuilder("ls") builder.add_value("%(IN_FILE)s") builder.add_value("%(OUT_FILE)s") assert_equal(builder.call, ["ls", "%(IN_FILE)s", "%(OUT_FILE)s"])