def _run(self, _config, temp): table = {} for filename in self.input_files: coverage.read_table(table, filename) coverage.write_table(table, reroot_path(temp, self._output_file)) move_file(reroot_path(temp, self._output_file), self._output_file)
def commit(self, temp): if not self.ready(): raise CmdError("Attempting to commit before command has completed") elif self._running: raise CmdError("Called 'commit' before calling 'join'") elif not os.path.samefile(self._temp, temp): raise CmdError("Mismatch between previous and current temp folders" ": %r != %s" % (self._temp, temp)) missing_files = self.expected_temp_files - set(os.listdir(temp)) if missing_files: raise CmdError("Expected files not created: %s" % (", ".join(missing_files))) temp = os.path.abspath(temp) filenames = self._generate_filenames(self._files, temp) for (key, filename) in filenames.iteritems(): if isinstance(filename, types.StringTypes): if key.startswith("OUT_"): fileutils.move_file(filename, self._files[key]) elif key.startswith("TEMP_OUT_"): fileutils.try_remove(filename) self._proc = None self._temp = None
def test_move_file__overwrite(temp_folder): with SetWorkingDirectory(temp_folder): set_file_contents("file_1", "4") set_file_contents("file_2", "5") move_file("file_1", "file_2") assert_equal(os.listdir("."), ["file_2"]) assert_equal(get_file_contents("file_2"), "4")
def _teardown(self, _config, temp): for sequence in self._sequences: filename = sequence + ".fasta" infile = os.path.join(temp, filename) outfile = os.path.join(self._destination, filename) fileutils.move_file(infile, outfile)
def commit(self, temp): if not self.ready(): raise CmdError( "Attempting to commit command before it has completed") elif self._handles: raise CmdError("Called 'commit' before calling 'join'") elif not os.path.samefile(self._temp, temp): raise CmdError("Mismatch between previous and current temp folders: %r != %s" \ % (self._temp, temp)) missing_files = self.expected_temp_files - set(os.listdir(temp)) if missing_files: raise CmdError("Expected files not created: %s" % (", ".join(missing_files))) temp = os.path.abspath(temp) for (key, filename) in self._generate_filenames(self._files, temp).iteritems(): if isinstance(filename, types.StringTypes): if key.startswith("OUT_"): fileutils.move_file(filename, self._files[key]) elif key.startswith("TEMP_OUT_"): fileutils.try_remove(filename) self._proc = None self._temp = None
def _teardown(self, config, temp): temp_filename = reroot_path(temp, self._input_file) os.remove(temp_filename) os.remove(temp_filename + ".bai") move_file(reroot_path(temp, self._output_file), self._output_file) Node._teardown(self, config, temp)
def test_move_file__move_to_different_folder(temp_folder): with SetWorkingDirectory(temp_folder): set_file_contents("file_1", "3") move_file("file_1", "dst/file_1") assert_equal(os.listdir("."), ["dst"]) assert_equal(os.listdir("dst"), ["file_1"]) assert_equal(get_file_contents("dst/file_1"), "3")
def _run(self, _config, temp): def keyfunc(bed): return (bed.contig, bed.name, bed.start) fastafile = pysam.Fastafile(self._reference) seqs = collections.defaultdict(list) with open(self._intervals) as bedfile: intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items() for (contig, beds) in sorted(intervals): beds.sort(key = keyfunc) for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name): gene_beds = tuple(gene_beds) for bed in gene_beds: seqs[(contig, gene)].append(fastafile.fetch(contig, bed.start, bed.end)) seq = "".join(seqs[(contig, gene)]) if any((bed.strand == "-") for bed in gene_beds): assert all((bed.strand == "-") for bed in gene_beds) seq = sequences.reverse_complement(seq) seqs[(contig, gene)] = seq temp_file = os.path.join(temp, "sequences.fasta") with open(temp_file, "w") as out_file: for ((_, gene), sequence) in sorted(seqs.items()): fasta.print_fasta(gene, sequence, out_file) move_file(temp_file, self._outfile)
def _run(self, _config, temp): table = {} for filename in self.input_files: read_table(table, filename) _write_table(table, reroot_path(temp, self._output_file)) move_file(reroot_path(temp, self._output_file), self._output_file)
def _run(self, _config, temp): def keyfunc(bed): return (bed.contig, bed.name, bed.start) fastafile = pysam.Fastafile(self._reference) seqs = collections.defaultdict(list) with open(self._intervals) as bedfile: intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items() for (contig, beds) in sorted(intervals): beds.sort(key=keyfunc) for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name): gene_beds = tuple(gene_beds) for bed in gene_beds: seqs[(contig, gene)].append( fastafile.fetch(contig, bed.start, bed.end)) seq = "".join(seqs[(contig, gene)]) if any((bed.strand == "-") for bed in gene_beds): assert all((bed.strand == "-") for bed in gene_beds) seq = sequences.reverse_complement(seq) seqs[(contig, gene)] = seq temp_file = os.path.join(temp, "sequences.fasta") with open(temp_file, "w") as out_file: for ((_, gene), sequence) in sorted(seqs.items()): fasta.print_fasta(gene, sequence, out_file) move_file(temp_file, self._outfile)
def test_move_file__simple_move_in_cwd(temp_folder): with SetWorkingDirectory(temp_folder): assert_equal(os.listdir("."), []) set_file_contents("file_1", "1") assert_equal(os.listdir("."), ["file_1"]) move_file("file_1", "file_2") assert_equal(os.listdir("."), ["file_2"]) assert_equal(get_file_contents("file_2"), "1")
def _teardown(self, config, temp): os.remove(os.path.join(temp, "RAxML_info.output")) source = os.path.join(temp, "RAxML_parsimonyTree.output.0") destination = fileutils.reroot_path(temp, self._output_tree) fileutils.move_file(source, destination) CommandNode._teardown(self, config, temp)
def test_move_file__move_to_new_folder(temp_folder): assert make_dirs(os.path.join(temp_folder, "src")) file_1 = os.path.join(temp_folder, "src", "file_1") file_2 = os.path.join(temp_folder, "dst", "file_2") set_file_contents(file_1, "2") move_file(file_1, file_2) assert_equal(os.listdir(os.path.dirname(file_1)), []) assert_equal(os.listdir(os.path.dirname(file_2)), ["file_2"]) assert_equal(get_file_contents(file_2), "2")
def _run(self, _config, temp): alignment = MSA.from_file(self._input_file) for (to_filter, groups) in self._filter_by.iteritems(): alignment = alignment.filter_singletons(to_filter, groups) temp_filename = fileutils.reroot_path(temp, self._output_file) with open(temp_filename, "w") as handle: alignment.to_file(handle) fileutils.move_file(temp_filename, self._output_file)
def test_move_file__simple_move(temp_folder): file_1 = os.path.join(temp_folder, "file_1") file_2 = os.path.join(temp_folder, "file_2") assert_equal(os.listdir(temp_folder), []) set_file_contents(file_1, "1") assert_equal(os.listdir(temp_folder), ["file_1"]) move_file(file_1, file_2) assert_equal(os.listdir(temp_folder), ["file_2"]) assert_equal(get_file_contents(file_2), "1")
def _teardown(self, config, temp): template = self._output_template bootstraps = self._bootstrap_num start = self._bootstrap_start for (src_file, dst_file) in self._bootstraps(template, bootstraps, start): src_file = os.path.join(temp, src_file) dst_file = fileutils.reroot_path(temp, dst_file) fileutils.move_file(src_file, dst_file) CommandNode._teardown(self, config, temp)
def _teardown(self, config, temp): for filename in os.listdir(temp): match = re.match("RAxML_(.*).Pypeline", filename) if match: source = os.path.join(temp, filename) destination = os.path.join(temp, self._template % match.groups()) fileutils.move_file(source, destination) CommandNode._teardown(self, config, temp)
def _teardown(self, config, temp): fileutils.move_file(os.path.join(temp, "RAxML_info.Pypeline"), os.path.join(temp, fileutils.swap_ext(self._output_alignment, ".info"))) fileutils.move_file(os.path.join(temp, "input.alignment.BS0"), os.path.join(temp, self._output_alignment)) os.remove(os.path.join(temp, "input.alignment")) os.remove(os.path.join(temp, "input.partition")) CommandNode._teardown(self, config, temp)
def _teardown(self, config, temp): fileutils.move_file( os.path.join(temp, "RAxML_info.Pypeline"), os.path.join(temp, fileutils.swap_ext(self._output_alignment, ".info"))) fileutils.move_file(os.path.join(temp, "input.alignment.BS0"), os.path.join(temp, self._output_alignment)) os.remove(os.path.join(temp, "input.alignment")) os.remove(os.path.join(temp, "input.partition")) CommandNode._teardown(self, config, temp)
def _teardown(self, config, temp): for filename in os.listdir(temp): match = re.match("ExaML_(.*).Pypeline", filename) if match: if "binaryCheckpoint" in match.groups(): os.remove(os.path.join(temp, filename)) else: source = os.path.join(temp, filename) destination = os.path.join(temp, self._template % match.groups()) fileutils.move_file(source, destination) CommandNode._teardown(self, config, temp)
def _run(self, _config, temp): main_trees = _read_tree_files(self._main_tree_files) support_trees = _read_tree_files(self._support_tree_files) lines = [] for main_tree in main_trees: supported_tree = main_tree.add_support(support_trees) lines.append(str(supported_tree)) lines = "\n".join(lines) + "\n" temp_output_file = os.path.join(temp, os.path.basename(self._output_file)) with open(temp_output_file, "w") as handle: handle.write(lines) move_file(temp_output_file, self._output_file)
def _run(self, _config, temp): lines = [] for tree in _read_tree_files(self._tree_files): if self._reroot_on_taxa: rooted_tree = tree.reroot_on_taxa(self._reroot_on_taxa) else: rooted_tree = tree.reroot_on_midpoint() lines.append(str(rooted_tree)) lines = "\n".join(lines) + "\n" temp_output_file = os.path.join(temp, os.path.basename(self._output_file)) with open(temp_output_file, "w") as handle: handle.write(lines) move_file(temp_output_file, self._output_file)
def _teardown(self, config, temp): for postfix in ("ALIGNMENT", "PARTITION"): filenames = [self._kwargs["TEMP_IN_" + postfix], self._kwargs["TEMP_IN_" + postfix] + ".reduced", self._kwargs["OUT_" + postfix]] for (source, destination) in zip(filenames, filenames[1:]): source = fileutils.reroot_path(temp, source) destination = fileutils.reroot_path(temp, destination) if not os.path.exists(destination): fileutils.move_file(source, destination) elif source != destination: os.remove(source) CommandNode._teardown(self, config, temp)
def _run(self, _config, temp): alignment = msa.read_msa(self._input_file) for (to_filter, groups) in self._filter_by.iteritems(): sequences = [alignment[group] for group in groups] sequence = list(alignment[to_filter]) for (index, nts) in enumerate(zip(*sequences)): nt = sequence[index] if (nt not in "Nn-") and (nts.count(nt) == 1): sequence[index] = 'n' alignment[to_filter] = "".join(sequence) temp_filename = fileutils.reroot_path(temp, self._output_file) msa.write_msa(alignment, temp_filename) fileutils.move_file(temp_filename, self._output_file)
def _teardown(self, config, temp): for postfix in ("ALIGNMENT", "PARTITION"): filenames = [ self._kwargs["TEMP_IN_" + postfix], self._kwargs["TEMP_IN_" + postfix] + ".reduced", self._kwargs["OUT_" + postfix] ] for (source, destination) in zip(filenames, filenames[1:]): source = fileutils.reroot_path(temp, source) destination = fileutils.reroot_path(temp, destination) if not os.path.exists(destination): fileutils.move_file(source, destination) elif source != destination: os.remove(source) CommandNode._teardown(self, config, temp)
def _teardown(self, config, temp): temp_filename = reroot_path(temp, self._output_file) move_file(temp_filename, self._output_file) for filename in self._pipes.itervalues(): os.remove(filename) for (filename, _) in self._tables.itervalues(): os.remove(filename) intervals = os.path.join(temp, "intervals.bed") if os.path.exists(intervals): os.remove(intervals) for proc in self._procs.get("cat", ()): proc.commit(temp) if not self._print_stats: os.remove(os.path.join(temp, "pipe_coverage_%i.stdout" % id(self)))
def _run(self, _config, temp): if self._seed is not None: rng = random.Random(self._seed) partitions = _read_partitions(self._input_part) header, names, sequences = _read_sequences(self._input_phy) bootstraps = self._bootstrap_sequences(sequences, partitions, rng) temp_fpath = reroot_path(temp, self._output_phy) with open(temp_fpath, "w") as output_phy: output_phy.write(header) for (name, fragments) in zip(names, bootstraps): output_phy.write(name) output_phy.write(" ") for sequence in fragments: output_phy.write(sequence) output_phy.write("\n") move_file(temp_fpath, self._output_phy)
def _run(self, _config, temp): def _by_name(bed): return bed.name fastafile = pysam.Fastafile(self._reference) seqs = collections.defaultdict(list) with open(self._bedfile) as bedfile: bedrecords = text.parse_lines_by_contig(bedfile, BEDRecord) for (contig, beds) in sorted(bedrecords.iteritems()): beds.sort(key=lambda bed: (bed.contig, bed.name, bed.start)) for (gene, gene_beds) in itertools.groupby(beds, _by_name): gene_beds = tuple(gene_beds) sequence = self._collect_sequence(fastafile, gene_beds) seqs[(contig, gene)] = sequence temp_file = os.path.join(temp, "sequences.fasta") with open(temp_file, "w") as out_file: for ((_, gene), sequence) in sorted(seqs.items()): FASTA(gene, None, sequence).write(out_file) fileutils.move_file(temp_file, self._outfile)
def _run(self, _config, temp): def _by_name(bed): return bed.name fastafile = pysam.Fastafile(self._reference) seqs = collections.defaultdict(list) with open(self._bedfile) as bedfile: bedrecords = text.parse_lines_by_contig(bedfile, pysam.asBed()) for (contig, beds) in sorted(bedrecords.iteritems()): beds.sort(key=lambda bed: (bed.contig, bed.name, bed.start)) for (gene, gene_beds) in itertools.groupby(beds, _by_name): gene_beds = tuple(gene_beds) sequence = self._collect_sequence(fastafile, gene_beds) seqs[(contig, gene)] = sequence temp_file = os.path.join(temp, "sequences.fasta") with open(temp_file, "w") as out_file: for ((_, gene), sequence) in sorted(seqs.items()): FASTA(gene, None, sequence).write(out_file) fileutils.move_file(temp_file, self._outfile)
def _teardown(self, _config, temp): source_file = fileutils.reroot_path(temp, self._output_file) output_file = self._output_file fileutils.move_file(source_file, output_file)
def _teardown(self, _config, temp): for dest in self.outputnames: move_file(reroot_path(temp, dest), dest) Node._teardown(self, _config, temp)
def _teardown(self, _config, temp): for destination in sorted(self._outfiles): source = fileutils.reroot_path(temp, destination) fileutils.move_file(source, destination)
def _teardown(self, _config, temp): move_file(reroot_path(temp, self._out_prefix + ".phy"), self._out_prefix + ".phy") move_file(reroot_path(temp, self._out_prefix + ".partitions"), self._out_prefix + ".partitions")
def _teardown(self, _config, temp): move_file(reroot_path(temp, self.dest), self.dest) Node._teardown(self, _config, temp)
def _teardown(self, _config, temp): move_file(reroot_path(temp, self._out_phy), self._out_phy)
def _teardown(self, _config, temp): move_file(reroot_path(temp, self._output_file), self._output_file)