Пример #1
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split("\n"):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split("\t")
                name = self._mapping.get(name, name)
                if name not in self._contigs:
                    # Excluding contigs is allowed
                    continue

                row = {
                    "ID": name,
                    "Size": self._contigs[name]["Size"],
                    "Ns": self._contigs[name]["Ns"],
                    "Hits": hits,
                }

                handle.write("{ID}\t{Size}\t{Ns}\t{Hits}\n".format(**row))

        CommandNode._setup(self, config, temp)
Пример #2
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # The temp folder may contain old files:
        # Remove old pipes to prevent failure at _teardown
        for pipe_fname in glob.glob(os.path.join(temp, "pipe*")):
            fileutils.try_remove(pipe_fname)
        # ExaML refuses to overwrite old info files
        fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline"))

        # Resume from last checkpoint, if one such was generated
        checkpoints = glob.glob(os.path.join(temp,
                                "ExaML_binaryCheckpoint.Pypeline_*"))
        if not checkpoints:
            return

        cache = FileStatusCache()
        if not cache.are_files_outdated(self.input_files, checkpoints):
            checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1]))

            # FIXME: Less hacky solution to modifying AtomicCmds needed
            self._command._command.append("-R")
            self._command._command.append(checkpoints[-1])
        else:
            for fpath in checkpoints:
                fileutils.try_remove(fpath)
Пример #3
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        input_files = [
            self._input_file,
            fileutils.swap_ext(self._input_file, ".bim"),
            fileutils.swap_ext(self._input_file, ".fam"),
        ]

        for filename in input_files:
            basename = os.path.basename(filename)
            os.symlink(os.path.abspath(filename), os.path.join(temp, basename))

        if self._supervised:
            fam_filename = fileutils.swap_ext(self._input_file, ".fam")

            pop_filename = fileutils.swap_ext(fam_filename, ".pop")
            pop_filename = fileutils.reroot_path(temp, pop_filename)

            key = "Group(%i)" % (self._k_groups,)
            with open(fam_filename) as fam_handle:
                with open(pop_filename, "w") as pop_handle:
                    for line in fam_handle:
                        sample, _ = line.split(None, 1)
                        group = self._samples.get(sample, {}).get(key, "-")

                        pop_handle.write("%s\n" % (group,))
Пример #4
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt",
                   "misincorporation.txt"):
         relpath = os.path.join(self._directory, fname)
         abspath = os.path.abspath(relpath)
         os.symlink(abspath, os.path.join(temp, fname))
Пример #5
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split('\t')
                name = contig_name_to_plink_name(name)
                if name is None or not (name.isdigit() or name == 'X'):
                    continue

                if int(size) != self._contigs[name]['Size']:
                    raise NodeError("TODO: size mismatch")

                row = {
                    'ID': name,
                    'Size': self._contigs[name]['Size'],
                    'Ns': self._contigs[name]['Ns'],
                    'Hits': hits,
                }

                handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))

        CommandNode._setup(self, config, temp)
Пример #6
0
    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)
Пример #7
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # The temp folder may contain old files:
        # Remove old pipes to prevent failure at _teardown
        for pipe_fname in glob.glob(os.path.join(temp, "pipe*")):
            fileutils.try_remove(pipe_fname)
        # ExaML refuses to overwrite old info files
        fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline"))

        # Resume from last checkpoint, if one such was generated
        checkpoints = glob.glob(
            os.path.join(temp, "ExaML_binaryCheckpoint.Pypeline_*"))
        if not checkpoints:
            return

        cache = FileStatusCache()
        if not cache.are_files_outdated(self.input_files, checkpoints):
            checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1]))

            # FIXME: Less hacky solution to modifying AtomicCmds needed
            self._command._command.append("-R")
            self._command._command.append(checkpoints[-1])
        else:
            for fpath in checkpoints:
                fileutils.try_remove(fpath)
Пример #8
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt",
                   "misincorporation.txt"):
         relpath = os.path.join(self._directory, fname)
         abspath = os.path.abspath(relpath)
         os.symlink(abspath, os.path.join(temp, fname))
Пример #9
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        input_files = [
            self._input_file,
            fileutils.swap_ext(self._input_file, ".bim"),
            fileutils.swap_ext(self._input_file, ".fam"),
        ]

        for filename in input_files:
            basename = os.path.basename(filename)
            os.symlink(os.path.abspath(filename), os.path.join(temp, basename))

        if self._supervised:
            fam_filename = fileutils.swap_ext(self._input_file, ".fam")

            pop_filename = fileutils.swap_ext(fam_filename, ".pop")
            pop_filename = fileutils.reroot_path(temp, pop_filename)

            key = "Group(%i)" % (self._k_groups, )
            with open(fam_filename) as fam_handle:
                with open(pop_filename, "w") as pop_handle:
                    for line in fam_handle:
                        sample, _ = line.split(None, 1)
                        group = self._samples.get(sample, {}).get(key, "-")

                        pop_handle.write("%s\n" % (group, ))
Пример #10
0
    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)
Пример #11
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split('\t')
                name = contig_name_to_plink_name(name)
                if name is None or not (name.isdigit() or name == 'X'):
                    continue
                elif name not in self._contigs:
                    # Excluding contigs is allowed
                    continue

                if int(size) != self._contigs[name]['Size']:
                    raise NodeError(
                        "Size mismatch between database and BAM; "
                        "expected size %i, found %i for contig %r" %
                        (int(size), self._contigs[name]['Size'], name))

                row = {
                    'ID': name,
                    'Size': self._contigs[name]['Size'],
                    'Ns': self._contigs[name]['Ns'],
                    'Hits': hits,
                }

                handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))

        CommandNode._setup(self, config, temp)
Пример #12
0
    def _setup(self, config, temp):
        for key in ("IN_ALIGNMENT", "IN_PARTITION"):
            source      = os.path.abspath(self._kwargs[key])
            destination = os.path.join(temp, self._kwargs["TEMP_" + key])

            os.symlink(source, destination)

        CommandNode._setup(self, config, temp)
Пример #13
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # Required to avoid the creation of files outside the temp folder
        for filename in self._symlinks:
            source      = os.path.abspath(filename)
            destination = os.path.join(temp, os.path.basename(filename))

            os.symlink(source, destination)
Пример #14
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        with open(self._tfam) as in_handle:
            samples = [line.split(None, 1)[0] for line in in_handle]

        with open(os.path.join(temp, "samples.clust"), "w") as handle:
            for sample in samples:
                handle.write("{0} {0} {0}\n".format(sample))
Пример #15
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # Required to avoid the creation of files outside the temp folder
        for filename in self._symlinks:
            source = os.path.abspath(filename)
            destination = os.path.join(temp, os.path.basename(filename))

            os.symlink(source, destination)
Пример #16
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        with open(self._tfam) as in_handle:
            samples = [line.split(None, 1)[0] for line in in_handle]

        with open(os.path.join(temp, "samples.clust"), "w") as handle:
            for sample in samples:
                handle.write("{0} {0} {0}\n".format(sample))
Пример #17
0
 def _setup(self, config, temp):
     os.symlink(
         os.path.abspath(self._input_alignment),
         os.path.join(temp, "RAxML_alignment"),
     )
     os.symlink(
         os.path.abspath(self._input_partitions),
         os.path.join(temp, "RAxML_partitions"),
     )
     CommandNode._setup(self, config, temp)
Пример #18
0
    def _setup(self, config, temp):
        if self._k_file is not None:
            stats = read_summary(self._k_file)
            n_sites = float(stats[self._k_field])
            k = max(1, int(math.ceil(self._snp_distance /
                                     (self._genome_size / n_sites))))

            self._param_k = k
            self._command._command.extend(("-k", str(k)))

        CommandNode._setup(self, config, temp)
Пример #19
0
    def _setup(self, config, temp):
        if self._k_file is not None:
            stats = read_summary(self._k_file)
            n_sites = float(stats[self._k_field])
            k = max(1, int(math.ceil(self._snp_distance /
                                     (self._genome_size / n_sites))))

            self._param_k = k
            self._command._command.extend(("-k", str(k)))

        CommandNode._setup(self, config, temp)
Пример #20
0
    def _setup(self, config, temp_root):
        CommandNode._setup(self, config, temp_root)
        dst_fname = os.path.join(temp_root, self._bam_input.pipe)
        if len(self._bam_input.files) > 1:
            os.mkfifo(dst_fname)
        else:
            src_fname, = self._bam_input.files
            os.symlink(os.path.join(os.getcwd(), src_fname), dst_fname)

            if self._bam_input.indexed:
                src_fname = os.path.join(os.getcwd(), swap_ext(src_fname, ".bai"))
                os.symlink(src_fname, dst_fname + ".bai")
Пример #21
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        pipe_fname = os.path.join(temp, self.PIPE_FILE)
        if len(self._input_bams) > 1:
            os.mkfifo(pipe_fname)
        else:
            source_fname = os.path.abspath(self._input_bams[0])
            os.symlink(source_fname, pipe_fname)

            if self._index_format:
                os.symlink(swap_ext(source_fname, self._index_format),
                           swap_ext(pipe_fname, self._index_format))
Пример #22
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        pipe_fname = os.path.join(temp, self.PIPE_FILE)
        if len(self._input_bams) > 1:
            os.mkfifo(pipe_fname)
        else:
            source_fname = os.path.abspath(self._input_bams[0])
            os.symlink(source_fname, pipe_fname)

            if self._index_format:
                os.symlink(swap_ext(source_fname, self._index_format),
                           swap_ext(pipe_fname, self._index_format))
Пример #23
0
    def _setup(self, config, temp):
        with open(self._bootstraps) as handle:
            bootstraps = [Newick.from_string(line.strip()) for line in handle]

        with open(self._treefile) as handle:
            tree = Newick.from_string(handle.read().strip())

        tree = tree.reroot_on_midpoint()
        tree = tree.add_support(bootstraps, "{Percentage:.0f}")
        with open(os.path.join(temp, "rerooted.newick"), "w") as handle:
            handle.write("{}\n".format(tree))

        CommandNode._setup(self, config, temp)
Пример #24
0
    def _setup(self, config, temp):
        with open(self._bootstraps) as handle:
            bootstraps = [Newick.from_string(line.strip())
                          for line in handle]

        with open(self._treefile) as handle:
            tree = Newick.from_string(handle.read().strip())

        tree = tree.reroot_on_midpoint()
        tree = tree.add_support(bootstraps, "{Percentage:.0f}")
        with open(os.path.join(temp, "rerooted.newick"), "w") as handle:
            handle.write("{}\n".format(tree))

        CommandNode._setup(self, config, temp)
Пример #25
0
    def _setup(self, config, temp):
        samples = {}
        with open(self._samples) as handle:
            header = handle.readline().strip().split('\t')
            for line in handle:
                row = dict(zip(header, line.strip().split('\t')))
                samples[row["Name"]] = row

        with open(os.path.join(temp, "samples.txt"), "w") as handle:
            handle.write("{}\n".format("\t".join(header)))

            for name in self._order:
                row = samples[name]
                handle.write("{}\n".format("\t".join(row[key]
                                                     for key in header)))

        CommandNode._setup(self, config, temp)
Пример #26
0
    def _setup(self, config, temp):
        samples = {}
        with open(self._samples) as handle:
            header = handle.readline().strip().split('\t')
            for line in handle:
                row = dict(zip(header, line.strip().split('\t')))
                samples[row["Name"]] = row

        with open(os.path.join(temp, "samples.txt"), "w") as handle:
            handle.write("{}\n".format("\t".join(header)))

            for name in self._order:
                row = samples[name]
                handle.write("{}\n".format("\t".join(row[key]
                                                     for key in header)))

        CommandNode._setup(self, config, temp)
Пример #27
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        with open(os.path.join(temp, "parameters.txt"), "w") as handle:
            handle.write("""
genotypename:      {input_prefix}.bed
snpname:           {input_prefix}.bim
indivname:         {input_prefix}.fam
evecoutname:       {output_prefix}.evec
evaloutname:       {output_prefix}.eval
deletsnpoutname:   {output_prefix}.deleted_snps
altnormstyle:      NO
numoutevec:        5
familynames:       YES
numoutlieriter:    1
numchrom:          {nchroms}
""".format(input_prefix=os.path.abspath(self._input_prefix),
           output_prefix=os.path.basename(self._output_prefix),
           nchroms=self._nchroms))
Пример #28
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        with open(os.path.join(temp, "parameters.txt"), "w") as handle:
            handle.write("""
genotypename:      {input_prefix}.bed
snpname:           {input_prefix}.bim
indivname:         {input_prefix}.fam
evecoutname:       {output_prefix}.evec
evaloutname:       {output_prefix}.eval
deletsnpoutname:   {output_prefix}.deleted_snps
altnormstyle:      NO
numoutevec:        5
familynames:       YES
numoutlieriter:    1
numchrom:          {nchroms}
numthreads:        1
""".format(input_prefix=os.path.abspath(self._input_prefix),
            output_prefix=os.path.basename(self._output_prefix),
            nchroms=self._nchroms))
Пример #29
0
def test_command_node__run():
    cfg_mock = Mock(temp_root=_DUMMY_TEMP_ROOT)
    mock = _build_cmd_mock()

    node_mock = CommandNode(mock)
    node_mock._create_temp_dir = mock._test_node_._create_temp_dir
    node_mock._create_temp_dir.return_value = _DUMMY_TEMP
    node_mock._setup = mock._test_node_._setup
    node_mock._teardown = mock._test_node_._teardown
    node_mock._remove_temp_dir = mock._test_node_._remove_temp_dir

    node_mock.run(cfg_mock)

    assert mock.mock_calls == [
        call._test_node_._create_temp_dir(cfg_mock),
        call._test_node_._setup(cfg_mock, _DUMMY_TEMP),
        call.run(_DUMMY_TEMP),
        call.join(),
        call._test_node_._teardown(cfg_mock, _DUMMY_TEMP),
        call._test_node_._remove_temp_dir(_DUMMY_TEMP),
    ]
Пример #30
0
    def _setup(self, config, temp):
        if self._multi_file_input:
            os.mkfifo(os.path.join(os.path.join(temp, "uncompressed_input")))

        CommandNode._setup(self, config, temp)
Пример #31
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     os.mkfifo(os.path.join(temp, self._basename))
Пример #32
0
def test_commandnode_setup__files_missing(kwargs):
    cmd_mock = _build_cmd_mock(**kwargs)
    node = CommandNode(cmd_mock)
    with pytest.raises(NodeError):
        node._setup(None, None)
Пример #33
0
def test_commandnode_setup__files_exist(kwargs):
    cmd_mock = _build_cmd_mock(**kwargs)
    node = CommandNode(cmd_mock)
    node._setup(None, None)
Пример #34
0
 def _setup(self, config, temp):
     os.symlink(os.path.abspath(self._input_alignment),  os.path.join(temp, "RAxML_alignment"))
     os.symlink(os.path.abspath(self._input_partitions), os.path.join(temp, "RAxML_partitions"))
     CommandNode._setup(self, config, temp)
Пример #35
0
 def _do_test_commandnode_setup(kwargs):
     cmd_mock = _build_cmd_mock(**kwargs)
     node = CommandNode(cmd_mock)
     node._setup(None, None)
Пример #36
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     for fname in ("Stats_out_MCMC_correct_prob.csv",):
         relpath = os.path.join(self._directory, fname)
         abspath = os.path.abspath(relpath)
         os.symlink(abspath, os.path.join(temp, fname))