def _teardown(self, config, temp): # Picard creates a folder named after the user in the temp-root try_rmtree(os.path.join(temp, getpass.getuser())) # Some JREs may create a folder for temporary performance counters try_rmtree(os.path.join(temp, "hsperfdata_" + getpass.getuser())) CommandNode._teardown(self, config, temp)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) # The temp folder may contain old files: # Remove old pipes to prevent failure at _teardown for pipe_fname in glob.glob(os.path.join(temp, "pipe*")): fileutils.try_remove(pipe_fname) # ExaML refuses to overwrite old info files fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline")) # Resume from last checkpoint, if one such was generated checkpoints = glob.glob(os.path.join(temp, "ExaML_binaryCheckpoint.Pypeline_*")) if not checkpoints: return cache = FileStatusCache() if cache.files_up_to_date(checkpoints, self.input_files): checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1])) # FIXME: Less hacky solution to modifying AtomicCmds needed self._command._command.append("-R") self._command._command.append(checkpoints[-1]) else: for fpath in checkpoints: fileutils.try_remove(fpath)
def __init__(self, parameters): description = "<MarkDuplicates: %s>" % (describe_files( parameters.input_bams), ) CommandNode.__init__(self, command=parameters.command.finalize(), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): description = "<Merge BAMs: %i file(s) -> '%s'>" \ % (len(parameters.input_bams), parameters.output_bam) CommandNode.__init__(self, command=parameters.command.finalize(), description=description, dependencies=parameters.dependencies)
def __init__(self, config, reference, input_files, output_file, dependencies): cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_map = AtomicCmd(["mapDamage", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference], IN_STDIN = cat_obj, CHECK_VERSION = MAPDAMAGE_VERSION) train_cmds = ParallelCmds(cat_cmds + [cmd_map]) cat_cmds, cat_obj = concatenate_input_bams(config, input_files) cmd_scale = AtomicCmd(["mapDamage", "--rescale-only", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d", "%(TEMP_DIR)s", "-r", reference, "--rescale-out", "%(OUT_BAM)s"], IN_STDIN = cat_obj, OUT_BAM = output_file, CHECK_VERSION = MAPDAMAGE_VERSION) rescale_cmds = ParallelCmds(cat_cmds + [cmd_scale]) description = "<mapDamageRescale: %i file(s) -> '%s'>" % (len(input_files), output_file) CommandNode.__init__(self, command = SequentialCmds([train_cmds, rescale_cmds]), description = description, dependencies = dependencies)
def __init__(self, control_file, sequence_file, trees_file, output_prefix, dependencies = ()): self._control_file = control_file self._sequence_file = sequence_file self._trees_file = trees_file self._output_prefix = output_prefix command = AtomicCmd(["codeml", "template.ctl"], IN_CONTROL_FILE = control_file, IN_SEQUENCE_FILE = sequence_file, IN_TREES_FILE = trees_file, TEMP_OUT_CTL = "template.ctl", TEMP_OUT_SEQS = "template.seqs", TEMP_OUT_TREES = "template.trees", TEMP_OUT_STDOUT = "template.stdout", TEMP_OUT_STDERR = "template.stderr", OUT_CODEML = output_prefix + ".codeml", TEMP_OUT_2NG_DN = "2NG.dN", TEMP_OUT_2NG_DS = "2NG.dS", TEMP_OUT_2NG_T = "2NG.t", TEMP_OUT_4FOLD = "4fold.nuc", TEMP_OUT_LNF = "lnf", TEMP_OUT_RST = "rst", TEMP_OUT_RST1 = "rst1", TEMP_OUT_RUB = "rub", IN_STDIN = "/dev/null", # Prevent promts from blocking set_cwd = True) CommandNode.__init__(self, description = "<CodemlNode: '%s' -> '%s.*'>" % (sequence_file, output_prefix), command = command, dependencies = dependencies)
def __init__(self, infile, outfile, genome, from_start = 0, from_end = 0, strand_relative = False, dependencies = ()): if type(from_start) != type(from_end): raise ValueError("'from_start' and 'from_end' should be of same type!") call = ["bedtools", "slop", "-i", "%(IN_FILE)s", "-g", "%(IN_GENOME)s", "-l", str(from_start), "-r", str(from_end)] if strand_relative: call.append("-s") if type(from_start) is float: call.append("-pct") command = AtomicCmd(call, IN_FILE = infile, IN_GENOME = genome, OUT_STDOUT = outfile, CHECK_VERSION = BEDTOOLS_VERSION) description = "<SlopBed: '%s' -> '%s'>" % (infile, outfile) CommandNode.__init__(self, description = description, command = command, dependencies = dependencies)
def __init__(self, control_file, sequence_file, trees_file, output_tar, exclude_groups = (), dependencies = ()): self._exclude_groups = safe_coerce_to_frozenset(exclude_groups) self._control_file = control_file self._sequence_file = sequence_file self._trees_file = trees_file paml_cmd = AtomicCmd(["codeml", "template.ctl"], IN_CONTROL_FILE = control_file, IN_SEQUENCE_FILE = sequence_file, IN_TREES_FILE = trees_file, TEMP_OUT_CTL = "template.ctl", TEMP_OUT_SEQS = "template.seqs", TEMP_OUT_TREES = "template.trees", TEMP_OUT_STDOUT = "template.stdout", TEMP_OUT_STDERR = "template.stderr", TEMP_OUT_4FOLD = "4fold.nuc", IN_STDIN = "/dev/null", # Prevent promts from blocking set_cwd = True, **CodemlNode._get_codeml_files("TEMP_OUT_CODEML")) tar_pairs = CodemlNode._get_codeml_files("TEMP_IN_CODEML") tar_files = ["%%(%s)s" % (key,) for key in tar_pairs] tar_cmd = AtomicCmd(["tar", "cvzf", "%(OUT_FILE)s"] + tar_files, OUT_FILE = output_tar, set_cwd = True, **tar_pairs) CommandNode.__init__(self, description = "<CodemlNode: %r -> %r>" % (sequence_file, output_tar), command = SequentialCmds([paml_cmd, tar_cmd]), dependencies = dependencies)
def __init__(self, config, reference, intervals, infiles, outfile, dependencies = ()): self._basename = os.path.basename(outfile) infiles = safe_coerce_to_tuple(infiles) jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar") command = AtomicJavaCmdBuilder(config, jar_file) command.set_option("-T", "IndelRealigner") command.set_option("-R", "%(IN_REFERENCE)s") command.set_option("-targetIntervals", "%(IN_INTERVALS)s") command.set_option("-o", "%(OUT_BAMFILE)s") command.set_option("--bam_compression", 0) command.set_option("--disable_bam_indexing") _set_input_files(command, infiles) command.set_kwargs(IN_REFERENCE = reference, IN_REF_DICT = fileutils.swap_ext(reference, ".dict"), IN_INTERVALS = intervals, OUT_BAMFILE = outfile) calmd = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], TEMP_IN_BAM = self._basename, IN_REF = reference, TEMP_OUT_STDOUT = self._basename + ".calmd") description = "<Indel Realign: %i file(s) -> '%s'>" \ % (len(infiles), outfile) CommandNode.__init__(self, description = description, command = ParallelCmds([command.finalize(), calmd]), dependencies = dependencies)
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq = 0, dependencies = ()): flt = AtomicCmd(["samtools", "view", "-bu", "-F0x4", "-q%i" % min_mapq, "%(IN_BAM)s"], IN_BAM = input_bam, OUT_STDOUT = AtomicCmd.PIPE) jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar") params = AtomicJavaCmdBuilder(config, jar_file) params.set_option("INPUT", "/dev/stdin", sep = "=") params.set_option("OUTPUT", "/dev/stdout", sep = "=") params.set_option("QUIET", "true", sep = "=") params.set_option("COMPRESSION_LEVEL", "0", sep = "=") for (tag, value) in sorted(tags.iteritems()): if tag not in ("PG", "Target", "PU_src", "PU_cur"): params.set_option(tag, value, sep = "=") elif tag == "PU_src": params.set_option("PU", value, sep = "=") params.set_kwargs(IN_STDIN = flt, OUT_STDOUT = AtomicCmd.PIPE) annotate = params.finalize() calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"], IN_REF = reference, IN_STDIN = annotate, OUT_STDOUT = output_bam) description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) CommandNode.__init__(self, command = ParallelCmds([flt, annotate, calmd]), description = description, dependencies = dependencies)
def _setup(self, config, temp): """See CommandNode._setup.""" infile = os.path.abspath(self._infile) outfile = reroot_path(temp, self._infile) os.symlink(infile, outfile) CommandNode._setup(self, config, temp)
def __init__(self, parameters): self._kwargs = parameters.command.kwargs CommandNode.__init__(self, command = parameters.command.finalize(), description = "<RAxMLReduce: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_alignment), dependencies = parameters.dependencies)
def __init__(self, parameters): commands = [parameters.commands[key].finalize() for key in ("cat", "filter", "bgzip")] description = "<VCFFilter: '%s' -> '%s'>" % (parameters.infile, parameters.outfile) CommandNode.__init__( self, description=description, command=ParallelCmds(commands), dependencies=parameters.dependencies )
def __init__(self, parameters): description = "<Merge BAMs: %i file(s) -> '%s'>" \ % (len(parameters.input_bams), parameters.output_bam) CommandNode.__init__(self, command = parameters.command.finalize(), description = description, dependencies = parameters.dependencies)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt", "misincorporation.txt"): relpath = os.path.join(self._directory, fname) abspath = os.path.abspath(relpath) os.symlink(abspath, os.path.join(temp, fname))
def __init__(self, config, d_make, bedn, mappa, unique, dependencies=()): inbedfile = d_make.bedfiles[bedn] basename, extension = os.path.splitext(os.path.basename(inbedfile)) bname = "{}_MappaOnly{}".format(basename, extension) dest = os.path.join(config.temp_local, bname) d_make.bedfiles[bedn] = dest call1 = [ "python", os.path.join(PREFIX, "intersectmappabed.py"), "%(IN_BED)s", "%(IN_MAPPA)s", str(unique), "%(OUT_DEST)s" ] cmd = AtomicCmd(call1, IN_BED=inbedfile, IN_MAPPA=mappa, OUT_DEST=dest, CHECK_VERSION=PYTHON_VERSION) description = ("<CLEANBEDFILES: '%s' -> '%s', Uniqueness: '%s'>" % (inbedfile, dest, unique)) CommandNode.__init__(self, description=description, command=cmd, dependencies=dependencies)
def __init__(self, parameters): commands = [parameters.commands[cmd].finalize() for cmd in ('fastq_dump',)] description = "<Mapping Pipeline>" CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def _setup(self, config, temp): check_fastq_files(self.input_files, self._quality_offset) os.mkfifo(os.path.join(temp, self._basename + ".truncated")) os.mkfifo(os.path.join(temp, self._basename + ".discarded")) os.mkfifo(os.path.join(temp, "uncompressed_input")) CommandNode._setup(self, config, temp)
def __init__(self, parameters): commands = [parameters.commands[key].finalize() for key in ("unicat", "pileup")] description = "<VCFPileup: '%s' -> '%s'>" % (parameters.in_bam, parameters.outfile) CommandNode.__init__(self, description = description, command = ParallelCmds(commands), dependencies = parameters.dependencies)
def __init__(self, parameters): command = parameters.command.finalize() description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile, parameters.outfile) CommandNode.__init__(self, description = description, command = command, dependencies = parameters.dependencies)
def __init__(self, parameters): self._directory = parameters.directory description = "<mapDamage (model): %r>" % (parameters.directory,) CommandNode.__init__(self, command=parameters.command.finalize(), description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): command = parameters.command.finalize() description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile, parameters.outfile) CommandNode.__init__(self, description=description, command=command, dependencies=parameters.dependencies)
def _setup(self, config, temp): for key in ("IN_ALIGNMENT", "IN_PARTITION"): source = self._kwargs[key] destination = os.path.join(temp, self._kwargs["TEMP_" + key]) fileutils.copy_file(source, destination) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): for key in ("IN_ALIGNMENT", "IN_PARTITION"): source = os.path.abspath(self._kwargs[key]) destination = os.path.join(temp, self._kwargs["TEMP_" + key]) os.symlink(source, destination) CommandNode._setup(self, config, temp)
def _teardown(self, config, temp): os.remove(os.path.join(temp, "RAxML_info.output")) source = os.path.join(temp, "RAxML_parsimonyTree.output.0") destination = fileutils.reroot_path(temp, self._output_tree) fileutils.move_file(source, destination) CommandNode._teardown(self, config, temp)
def __init__(self, parameters): command = parameters.command.finalize() description = "<Bowtie2 Index '%s' -> '%s.*'>" % (parameters.input_file, parameters.prefix) CommandNode.__init__(self, command = command, description = description, dependencies = parameters.dependencies)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) # Required to avoid the creation of files outside the temp folder for filename in self._symlinks: source = os.path.abspath(filename) destination = os.path.join(temp, os.path.basename(filename)) os.symlink(source, destination)
def __init__(self, parameters): _check_bwa_prefix(parameters.prefix) command = ParallelCmds([parameters.commands[key].finalize() for key in parameters.order]) description = "<PE_BWA (%i threads): '%s'>" % (parameters.threads, parameters.input_file_1) CommandNode.__init__(self, command = command, description = description, threads = parameters.threads, dependencies = parameters.dependencies)
def _teardown(self, config, temp): template = self._output_template bootstraps = self._bootstrap_num start = self._bootstrap_start for (src_file, dst_file) in self._bootstraps(template, bootstraps, start): src_file = os.path.join(temp, src_file) dst_file = fileutils.reroot_path(temp, dst_file) fileutils.move_file(src_file, dst_file) CommandNode._teardown(self, config, temp)
def __init__(self, parameters): self._symlinks = [os.path.abspath(parameters.input_alignment)] self._output_tree = os.path.basename(parameters.output_tree) CommandNode.__init__(self, command = parameters.command.finalize(), description = "<Parsimonator: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_tree), dependencies = parameters.dependencies)
def __init__(self, infile, dependencies = ()): cmd_index = AtomicCmd(["samtools", "index", "%(IN_BAM)s", "%(OUT_BAI)s"], IN_BAM = infile, OUT_BAI = swap_ext(infile, ".bai"), CHECK_SAM = SAMTOOLS_VERSION) CommandNode.__init__(self, description = "<BAMIndex: '%s'>" % (infile,), command = cmd_index, dependencies = dependencies)
def _run(self, config, temp): try: CommandNode._run(self, config, temp) except NodeError, error: if self._command.join() == [1, None]: with open(fileutils.reroot_path(temp, "template.stdout")) as handle: lines = handle.readlines() if lines and ("Giving up." in lines[-1]): error = NodeError("%s\n\n%s" % (error, lines[-1])) raise error
def _teardown(self, config, temp): for filename in os.listdir(temp): match = re.match("RAxML_(.*).Pypeline", filename) if match: source = os.path.join(temp, filename) destination = os.path.join(temp, self._template % match.groups()) fileutils.move_file(source, destination) CommandNode._teardown(self, config, temp)
def __init__(self, parameters): self._input_alignment = parameters.input_alignment self._input_partitions = parameters.input_partitions self._output_tree = parameters.output_tree CommandNode.__init__(self, command = parameters.command.finalize(), description = "<RAxMLParsimonyTree: '%s' -> '%s'>" \ % (parameters.input_alignment, parameters.output_tree), dependencies = parameters.dependencies)