def _setup(self, config, temp): with open(os.path.join(temp, "contigs.table"), "w") as handle: handle.write("ID\tSize\tNs\tHits\n") # Workaround for pysam < 0.9 returning list, >= 0.9 returning str for line in "".join(pysam.idxstats(self._input_file)).split("\n"): line = line.strip() if not line: continue name, size, hits, _ = line.split("\t") name = self._mapping.get(name, name) if name not in self._contigs: # Excluding contigs is allowed continue row = { "ID": name, "Size": self._contigs[name]["Size"], "Ns": self._contigs[name]["Ns"], "Hits": hits, } handle.write("{ID}\t{Size}\t{Ns}\t{Hits}\n".format(**row)) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) # The temp folder may contain old files: # Remove old pipes to prevent failure at _teardown for pipe_fname in glob.glob(os.path.join(temp, "pipe*")): fileutils.try_remove(pipe_fname) # ExaML refuses to overwrite old info files fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline")) # Resume from last checkpoint, if one such was generated checkpoints = glob.glob(os.path.join(temp, "ExaML_binaryCheckpoint.Pypeline_*")) if not checkpoints: return cache = FileStatusCache() if not cache.are_files_outdated(self.input_files, checkpoints): checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1])) # FIXME: Less hacky solution to modifying AtomicCmds needed self._command._command.append("-R") self._command._command.append(checkpoints[-1]) else: for fpath in checkpoints: fileutils.try_remove(fpath)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) input_files = [ self._input_file, fileutils.swap_ext(self._input_file, ".bim"), fileutils.swap_ext(self._input_file, ".fam"), ] for filename in input_files: basename = os.path.basename(filename) os.symlink(os.path.abspath(filename), os.path.join(temp, basename)) if self._supervised: fam_filename = fileutils.swap_ext(self._input_file, ".fam") pop_filename = fileutils.swap_ext(fam_filename, ".pop") pop_filename = fileutils.reroot_path(temp, pop_filename) key = "Group(%i)" % (self._k_groups,) with open(fam_filename) as fam_handle: with open(pop_filename, "w") as pop_handle: for line in fam_handle: sample, _ = line.split(None, 1) group = self._samples.get(sample, {}).get(key, "-") pop_handle.write("%s\n" % (group,))
def _setup(self, config, temp): CommandNode._setup(self, config, temp) for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt", "misincorporation.txt"): relpath = os.path.join(self._directory, fname) abspath = os.path.abspath(relpath) os.symlink(abspath, os.path.join(temp, fname))
def _setup(self, config, temp): with open(os.path.join(temp, "contigs.table"), "w") as handle: handle.write("ID\tSize\tNs\tHits\n") # Workaround for pysam < 0.9 returning list, >= 0.9 returning str for line in "".join(pysam.idxstats(self._input_file)).split('\n'): line = line.strip() if not line: continue name, size, hits, _ = line.split('\t') name = contig_name_to_plink_name(name) if name is None or not (name.isdigit() or name == 'X'): continue if int(size) != self._contigs[name]['Size']: raise NodeError("TODO: size mismatch") row = { 'ID': name, 'Size': self._contigs[name]['Size'], 'Ns': self._contigs[name]['Ns'], 'Hits': hits, } handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row)) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): """See CommandNode._setup.""" infile = os.path.abspath(self._infile) outfile = reroot_path(temp, self._infile) os.symlink(infile, outfile) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) # The temp folder may contain old files: # Remove old pipes to prevent failure at _teardown for pipe_fname in glob.glob(os.path.join(temp, "pipe*")): fileutils.try_remove(pipe_fname) # ExaML refuses to overwrite old info files fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline")) # Resume from last checkpoint, if one such was generated checkpoints = glob.glob( os.path.join(temp, "ExaML_binaryCheckpoint.Pypeline_*")) if not checkpoints: return cache = FileStatusCache() if not cache.are_files_outdated(self.input_files, checkpoints): checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1])) # FIXME: Less hacky solution to modifying AtomicCmds needed self._command._command.append("-R") self._command._command.append(checkpoints[-1]) else: for fpath in checkpoints: fileutils.try_remove(fpath)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) input_files = [ self._input_file, fileutils.swap_ext(self._input_file, ".bim"), fileutils.swap_ext(self._input_file, ".fam"), ] for filename in input_files: basename = os.path.basename(filename) os.symlink(os.path.abspath(filename), os.path.join(temp, basename)) if self._supervised: fam_filename = fileutils.swap_ext(self._input_file, ".fam") pop_filename = fileutils.swap_ext(fam_filename, ".pop") pop_filename = fileutils.reroot_path(temp, pop_filename) key = "Group(%i)" % (self._k_groups, ) with open(fam_filename) as fam_handle: with open(pop_filename, "w") as pop_handle: for line in fam_handle: sample, _ = line.split(None, 1) group = self._samples.get(sample, {}).get(key, "-") pop_handle.write("%s\n" % (group, ))
def _setup(self, config, temp): with open(os.path.join(temp, "contigs.table"), "w") as handle: handle.write("ID\tSize\tNs\tHits\n") # Workaround for pysam < 0.9 returning list, >= 0.9 returning str for line in "".join(pysam.idxstats(self._input_file)).split('\n'): line = line.strip() if not line: continue name, size, hits, _ = line.split('\t') name = contig_name_to_plink_name(name) if name is None or not (name.isdigit() or name == 'X'): continue elif name not in self._contigs: # Excluding contigs is allowed continue if int(size) != self._contigs[name]['Size']: raise NodeError( "Size mismatch between database and BAM; " "expected size %i, found %i for contig %r" % (int(size), self._contigs[name]['Size'], name)) row = { 'ID': name, 'Size': self._contigs[name]['Size'], 'Ns': self._contigs[name]['Ns'], 'Hits': hits, } handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row)) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): for key in ("IN_ALIGNMENT", "IN_PARTITION"): source = os.path.abspath(self._kwargs[key]) destination = os.path.join(temp, self._kwargs["TEMP_" + key]) os.symlink(source, destination) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) # Required to avoid the creation of files outside the temp folder for filename in self._symlinks: source = os.path.abspath(filename) destination = os.path.join(temp, os.path.basename(filename)) os.symlink(source, destination)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) with open(self._tfam) as in_handle: samples = [line.split(None, 1)[0] for line in in_handle] with open(os.path.join(temp, "samples.clust"), "w") as handle: for sample in samples: handle.write("{0} {0} {0}\n".format(sample))
def _setup(self, config, temp): os.symlink( os.path.abspath(self._input_alignment), os.path.join(temp, "RAxML_alignment"), ) os.symlink( os.path.abspath(self._input_partitions), os.path.join(temp, "RAxML_partitions"), ) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): if self._k_file is not None: stats = read_summary(self._k_file) n_sites = float(stats[self._k_field]) k = max(1, int(math.ceil(self._snp_distance / (self._genome_size / n_sites)))) self._param_k = k self._command._command.extend(("-k", str(k))) CommandNode._setup(self, config, temp)
def _setup(self, config, temp_root): CommandNode._setup(self, config, temp_root) dst_fname = os.path.join(temp_root, self._bam_input.pipe) if len(self._bam_input.files) > 1: os.mkfifo(dst_fname) else: src_fname, = self._bam_input.files os.symlink(os.path.join(os.getcwd(), src_fname), dst_fname) if self._bam_input.indexed: src_fname = os.path.join(os.getcwd(), swap_ext(src_fname, ".bai")) os.symlink(src_fname, dst_fname + ".bai")
def _setup(self, config, temp): CommandNode._setup(self, config, temp) pipe_fname = os.path.join(temp, self.PIPE_FILE) if len(self._input_bams) > 1: os.mkfifo(pipe_fname) else: source_fname = os.path.abspath(self._input_bams[0]) os.symlink(source_fname, pipe_fname) if self._index_format: os.symlink(swap_ext(source_fname, self._index_format), swap_ext(pipe_fname, self._index_format))
def _setup(self, config, temp): with open(self._bootstraps) as handle: bootstraps = [Newick.from_string(line.strip()) for line in handle] with open(self._treefile) as handle: tree = Newick.from_string(handle.read().strip()) tree = tree.reroot_on_midpoint() tree = tree.add_support(bootstraps, "{Percentage:.0f}") with open(os.path.join(temp, "rerooted.newick"), "w") as handle: handle.write("{}\n".format(tree)) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): samples = {} with open(self._samples) as handle: header = handle.readline().strip().split('\t') for line in handle: row = dict(zip(header, line.strip().split('\t'))) samples[row["Name"]] = row with open(os.path.join(temp, "samples.txt"), "w") as handle: handle.write("{}\n".format("\t".join(header))) for name in self._order: row = samples[name] handle.write("{}\n".format("\t".join(row[key] for key in header))) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) with open(os.path.join(temp, "parameters.txt"), "w") as handle: handle.write(""" genotypename: {input_prefix}.bed snpname: {input_prefix}.bim indivname: {input_prefix}.fam evecoutname: {output_prefix}.evec evaloutname: {output_prefix}.eval deletsnpoutname: {output_prefix}.deleted_snps altnormstyle: NO numoutevec: 5 familynames: YES numoutlieriter: 1 numchrom: {nchroms} """.format(input_prefix=os.path.abspath(self._input_prefix), output_prefix=os.path.basename(self._output_prefix), nchroms=self._nchroms))
def _setup(self, config, temp): CommandNode._setup(self, config, temp) with open(os.path.join(temp, "parameters.txt"), "w") as handle: handle.write(""" genotypename: {input_prefix}.bed snpname: {input_prefix}.bim indivname: {input_prefix}.fam evecoutname: {output_prefix}.evec evaloutname: {output_prefix}.eval deletsnpoutname: {output_prefix}.deleted_snps altnormstyle: NO numoutevec: 5 familynames: YES numoutlieriter: 1 numchrom: {nchroms} numthreads: 1 """.format(input_prefix=os.path.abspath(self._input_prefix), output_prefix=os.path.basename(self._output_prefix), nchroms=self._nchroms))
def test_command_node__run(): cfg_mock = Mock(temp_root=_DUMMY_TEMP_ROOT) mock = _build_cmd_mock() node_mock = CommandNode(mock) node_mock._create_temp_dir = mock._test_node_._create_temp_dir node_mock._create_temp_dir.return_value = _DUMMY_TEMP node_mock._setup = mock._test_node_._setup node_mock._teardown = mock._test_node_._teardown node_mock._remove_temp_dir = mock._test_node_._remove_temp_dir node_mock.run(cfg_mock) assert mock.mock_calls == [ call._test_node_._create_temp_dir(cfg_mock), call._test_node_._setup(cfg_mock, _DUMMY_TEMP), call.run(_DUMMY_TEMP), call.join(), call._test_node_._teardown(cfg_mock, _DUMMY_TEMP), call._test_node_._remove_temp_dir(_DUMMY_TEMP), ]
def _setup(self, config, temp): if self._multi_file_input: os.mkfifo(os.path.join(os.path.join(temp, "uncompressed_input"))) CommandNode._setup(self, config, temp)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) os.mkfifo(os.path.join(temp, self._basename))
def test_commandnode_setup__files_missing(kwargs): cmd_mock = _build_cmd_mock(**kwargs) node = CommandNode(cmd_mock) with pytest.raises(NodeError): node._setup(None, None)
def test_commandnode_setup__files_exist(kwargs): cmd_mock = _build_cmd_mock(**kwargs) node = CommandNode(cmd_mock) node._setup(None, None)
def _setup(self, config, temp): os.symlink(os.path.abspath(self._input_alignment), os.path.join(temp, "RAxML_alignment")) os.symlink(os.path.abspath(self._input_partitions), os.path.join(temp, "RAxML_partitions")) CommandNode._setup(self, config, temp)
def _do_test_commandnode_setup(kwargs): cmd_mock = _build_cmd_mock(**kwargs) node = CommandNode(cmd_mock) node._setup(None, None)
def _setup(self, config, temp): CommandNode._setup(self, config, temp) for fname in ("Stats_out_MCMC_correct_prob.csv",): relpath = os.path.join(self._directory, fname) abspath = os.path.abspath(relpath) os.symlink(abspath, os.path.join(temp, fname))