Exemplo n.º 1
0
def modified_after(younger, older):
    """Returns true any of the files expected to be 'younger' have
    been modified after any of the files expected to be 'older'."""
    def get_mtimes(filenames):
        for filename in filenames:
            yield os.path.getmtime(filename)

    younger_time = max(get_mtimes(safe_coerce_to_frozenset(younger)))
    older_time = min(get_mtimes(safe_coerce_to_frozenset(older)))

    return younger_time > older_time
Exemplo n.º 2
0
def modified_after(younger, older):
    """Returns true any of the files expected to be 'younger' have
    been modified after any of the files expected to be 'older'."""
    def get_mtimes(filenames):
        for filename in filenames:
            yield os.path.getmtime(filename)

    younger_time = max(get_mtimes(safe_coerce_to_frozenset(younger)))
    older_time   = min(get_mtimes(safe_coerce_to_frozenset(older)))

    return younger_time > older_time
Exemplo n.º 3
0
    def __init__(self, nodes, cache_factory=FileStatusCache):
        self._cache_factory = cache_factory
        self._state_observers = []
        self._states = {}

        nodes = safe_coerce_to_frozenset(nodes)

        self._logger = logging.getLogger(__name__)
        self._reverse_dependencies = collections.defaultdict(set)
        self._collect_reverse_dependencies(nodes, self._reverse_dependencies,
                                           set())
        self._intersections = {}
        self._top_nodes = [
            node
            for (node, rev_deps) in self._reverse_dependencies.iteritems()
            if not rev_deps
        ]

        self._logger.info("  - Checking file dependencies ...")
        self._check_file_dependencies(self._reverse_dependencies)
        self._logger.info("  - Checking for required executables ...")
        self._check_required_executables(self._reverse_dependencies)
        self._logger.info("  - Checking version requirements ...")
        self._check_version_requirements(self._reverse_dependencies)
        self._logger.info("  - Determining states ...")
        self.refresh_states()
        self._logger.info("  - Ready ...\n")
Exemplo n.º 4
0
 def _validate_requirements(cls, requirements):
     requirements = safe_coerce_to_frozenset(requirements)
     for requirement in requirements:
         if not isinstance(requirement, collections.Callable):
             raise TypeError("'requirements' must be callable, not %r" \
                 % (type(requirement),))
     return requirements
Exemplo n.º 5
0
 def _validate_files(cls, files):
     files = safe_coerce_to_frozenset(files)
     for filename in files:
         if not isinstance(filename, types.StringTypes):
             raise TypeError('Files must be strings, not %r' %
                             filename.__class__.__name__)
     return files
Exemplo n.º 6
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1, ... }
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = utilities.safe_coerce_to_frozenset(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [
            os.path.join(destination, name + ".fasta")
            for name in self._sequences
        ]

        input_files = list(self._infiles.itervalues())
        for filename in self._infiles.itervalues():
            input_files.append(filename + ".fai")

        desc = "<CollectSequences: %i sequences from %i files -> '%s'>" \
               % (len(self._sequences), len(self._infiles), self._destination)
        Node.__init__(self,
                      description=desc,
                      input_files=input_files,
                      output_files=self._outfiles,
                      dependencies=dependencies)
Exemplo n.º 7
0
    def __init__(self, control_file, sequence_file, trees_file, output_tar, exclude_groups = (), dependencies = ()):
        self._exclude_groups = safe_coerce_to_frozenset(exclude_groups)
        self._control_file   = control_file
        self._sequence_file  = sequence_file
        self._trees_file     = trees_file

        paml_cmd = AtomicCmd(["codeml", "template.ctl"],
                             IN_CONTROL_FILE  = control_file,
                             IN_SEQUENCE_FILE = sequence_file,
                             IN_TREES_FILE    = trees_file,
                             TEMP_OUT_CTL     = "template.ctl",
                             TEMP_OUT_SEQS    = "template.seqs",
                             TEMP_OUT_TREES   = "template.trees",
                             TEMP_OUT_STDOUT  = "template.stdout",
                             TEMP_OUT_STDERR  = "template.stderr",
                             TEMP_OUT_4FOLD   = "4fold.nuc",
                             IN_STDIN         = "/dev/null", # Prevent promts from blocking
                             set_cwd          = True,
                             **CodemlNode._get_codeml_files("TEMP_OUT_CODEML"))

        tar_pairs = CodemlNode._get_codeml_files("TEMP_IN_CODEML")
        tar_files = ["%%(%s)s" % (key,) for key in tar_pairs]
        tar_cmd  = AtomicCmd(["tar", "cvzf", "%(OUT_FILE)s"] + tar_files,
                             OUT_FILE = output_tar,
                             set_cwd  = True,
                             **tar_pairs)

        CommandNode.__init__(self,
                             description  = "<CodemlNode: %r -> %r>" % (sequence_file, output_tar),
                             command      = SequentialCmds([paml_cmd, tar_cmd]),
                             dependencies = dependencies)
Exemplo n.º 8
0
 def _validate_requirements(cls, requirements):
     requirements = safe_coerce_to_frozenset(requirements)
     for requirement in requirements:
         if not isinstance(requirement, collections.Callable):
             raise TypeError("'requirements' must be callable, not %r" \
                 % (type(requirement),))
     return requirements
Exemplo n.º 9
0
    def __init__(self, infiles, out_prefix, exclude_groups=(), reduce=False,
                 dependencies=(), file_dependencies=()):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.iteritems():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s"
                                 % (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \
            (" (reducing)" if reduce else "", len(infiles), out_prefix)

        Node.__init__(self,
                      description=description,
                      input_files=input_filenames,
                      output_files=[out_prefix + ".phy",
                                    out_prefix + ".partitions"],
                      dependencies=dependencies)
Exemplo n.º 10
0
def missing_files(filenames):
    """Given a list of filenames, returns a list of those that
    does not exist. Note that this function does not differentiate
    between files and folders."""
    result = []
    for filename in safe_coerce_to_frozenset(filenames):
        if not os.path.exists(filename):
            result.append(filename)

    return result
Exemplo n.º 11
0
def missing_files(filenames):
    """Given a list of filenames, returns a list of those that
    does not exist. Note that this function does not differentiate
    between files and folders."""
    result = []
    for filename in safe_coerce_to_frozenset(filenames):
        if not os.path.exists(filename):
            result.append(filename)

    return result
Exemplo n.º 12
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            # The taxa to be filtered is implied to be part of the group,
            # but is not needed when actually carrying out the filtering
            groups = utilities.safe_coerce_to_frozenset(groups) - utilities.safe_coerce_to_frozenset(to_filter)

            if not groups:
                raise RuntimeError("Singleton filtering must involve at least " "one other taxa")
            self._filter_by[to_filter] = groups

        Node.__init__(
            self,
            description="<FilterSingleton: '%s' -> '%s'>" % (input_file, output_file),
            input_files=[input_file],
            output_files=[output_file],
            dependencies=dependencies,
        )
Exemplo n.º 13
0
    def reroot_on_taxa(self, taxa):
        taxa = safe_coerce_to_frozenset(taxa)
        if not taxa:
            raise ValueError("No taxa in outgroup")

        clades = self._collect_clades()
        root_on = self._collect_nodes_from_names(taxa)
        # Because None is the id of the root atm: # pylint: disable=W1111
        root = self._create_root_with_clade(clades, root_on)

        return self.rebuild_tree(root, root)
Exemplo n.º 14
0
    def reroot_on_taxa(self, taxa):
        taxa  = safe_coerce_to_frozenset(taxa)
        if not taxa:
            raise ValueError("No taxa in outgroup")

        clades   = self._collect_clades()
        root_on  = self._collect_nodes_from_names(taxa)
        # Because None is the id of the root atm: # pylint: disable=W1111
        root     = self._create_root_with_clade(clades, root_on)

        return self.rebuild_tree(root, root)
Exemplo n.º 15
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            # The taxa to be filtered is implied to be part of the group,
            # but is not needed when actually carrying out the filtering
            groups = utilities.safe_coerce_to_frozenset(groups) \
                - utilities.safe_coerce_to_frozenset(to_filter)

            if not groups:
                raise RuntimeError("Singleton filtering must involve at least "
                                   "one other taxa")
            self._filter_by[to_filter] = groups

        Node.__init__(self,
                      description="<FilterSingleton: '%s' -> '%s'>" %
                      (input_file, output_file),
                      input_files=[input_file],
                      output_files=[output_file],
                      dependencies=dependencies)
Exemplo n.º 16
0
    def _collect_nodes(self, nodes, description):
        if nodes is None:
            return frozenset()

        nodes = safe_coerce_to_frozenset(nodes)
        bad_nodes = [node for node in nodes if not isinstance(node, Node)]

        if bad_nodes:
            bad_nodes = [repr(node) for node in bad_nodes]
            message = "%s-list contain non-Node objects:\n\t- Command: %s\n\t- Objects: %s" \
                % (description, self, "\n\t           ".join(bad_nodes))
            raise TypeError(message)

        return nodes
Exemplo n.º 17
0
    def _collect_nodes(self, nodes, description):
        if nodes is None:
            return frozenset()

        nodes = safe_coerce_to_frozenset(nodes)
        bad_nodes = [node for node in nodes if not isinstance(node, Node)]

        if bad_nodes:
            bad_nodes = [repr(node) for node in bad_nodes]
            message = "%s-list contain non-Node objects:\n\t- Command: %s\n\t- Objects: %s" \
                % (description, self, "\n\t           ".join(bad_nodes))
            raise TypeError(message)

        return nodes
Exemplo n.º 18
0
    def __init__(self, nodes):
        nodes = safe_coerce_to_frozenset(nodes)
        self._reverse_dependencies = collections.defaultdict(set)
        self._collect_reverse_dependencies(nodes, self._reverse_dependencies)
        self._intersections = self._calculate_intersections()
        self._top_nodes = [node for (node, rev_deps) in self._reverse_dependencies.iteritems() if not rev_deps]

        ui.print_info("  - Checking file dependencies ...", file = sys.stderr)
        self._check_file_dependencies(self._reverse_dependencies)
        ui.print_info("  - Checking for required executables ...", file = sys.stderr)
        self._check_required_executables(self._reverse_dependencies)
        ui.print_info("", file = sys.stderr)

        self._states = {}
        self.refresh_states()
Exemplo n.º 19
0
    def _group(self, selection, extra = None):
        selection = safe_coerce_to_frozenset(selection)
        if (extra in selection):
            raise MSAError("Key used for multiple selections: %r" % extra)
        elif not selection:
            raise ValueError("No FASTA names given")

        missing_keys = selection - self.names()
        if missing_keys:
            raise KeyError("Key(s) not found: %r" % (", ".join(map(str, missing_keys))))

        included, excluded, other = [], [], None
        for record in self:
            if record.name in selection:
                included.append(record)
            elif record.name != extra:
                excluded.append(record)
            else:
                other = record

        return included, excluded, other
Exemplo n.º 20
0
    def __init__(self,
                 infiles,
                 out_prefix,
                 exclude_groups=(),
                 reduce=False,
                 dependencies=(),
                 file_dependencies=()):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.iteritems():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s" %
                                 (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \
            (" (reducing)" if reduce else "", len(infiles), out_prefix)

        Node.__init__(
            self,
            description=description,
            input_files=input_filenames,
            output_files=[out_prefix + ".phy", out_prefix + ".partitions"],
            dependencies=dependencies)
Exemplo n.º 21
0
    def __init__(self, nodes, cache_factory=FileStatusCache):
        self._cache_factory = cache_factory
        self._state_observers = []
        self._states = {}

        nodes = safe_coerce_to_frozenset(nodes)

        self._logger = logging.getLogger(__name__)
        self._reverse_dependencies = collections.defaultdict(set)
        self._collect_reverse_dependencies(nodes, self._reverse_dependencies, set())
        self._intersections = {}
        self._top_nodes = [node for (node, rev_deps) in self._reverse_dependencies.iteritems() if not rev_deps]

        self._logger.info("  - Checking file dependencies ...")
        self._check_file_dependencies(self._reverse_dependencies)
        self._logger.info("  - Checking for required executables ...")
        self._check_required_executables(self._reverse_dependencies)
        self._logger.info("  - Checking version requirements ...")
        self._check_version_requirements(self._reverse_dependencies)
        self._logger.info("  - Determining states ...")
        self.refresh_states()
        self._logger.info("  - Ready ...\n")
Exemplo n.º 22
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1, ... }
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = utilities.safe_coerce_to_frozenset(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [os.path.join(destination, name + ".fasta") for name in self._sequences]

        input_files = list(self._infiles.itervalues())
        for filename in self._infiles.itervalues():
            input_files.append(filename + ".fai")

        desc = "<CollectSequences: %i sequences from %i files -> '%s'>" % (
            len(self._sequences),
            len(self._infiles),
            self._destination,
        )
        Node.__init__(
            self, description=desc, input_files=input_files, output_files=self._outfiles, dependencies=dependencies
        )
Exemplo n.º 23
0
def test_safe_coerce_to_frozenset__tuple():
    assert_equal(utils.safe_coerce_to_frozenset((1, 3, 2)),
                 frozenset(((1, 3, 2))))
Exemplo n.º 24
0
def test_safe_coerce_to_frozenset__list():
    assert_equal(utils.safe_coerce_to_frozenset([1, 3, 2]), frozenset(
        (1, 3, 2)))
Exemplo n.º 25
0
def test_safe_coerce_to_frozenset__int():
    assert_equal(utils.safe_coerce_to_frozenset(17), frozenset((17, )))
Exemplo n.º 26
0
def test_safe_coerce_to_frozenset__unicode():
    assert_equal(utils.safe_coerce_to_frozenset(u"foo"), frozenset((u"foo", )))
Exemplo n.º 27
0
def test_safe_coerce_to_frozenset__str():
    assert_equal(utils.safe_coerce_to_frozenset("foo"), frozenset(("foo", )))
Exemplo n.º 28
0
 def _validate_files(cls, files):
     files = safe_coerce_to_frozenset(files)
     for filename in files:
         if not isinstance(filename, types.StringTypes):
             raise TypeError('Files must be strings, not %r' % filename.__class__.__name__)
     return files
Exemplo n.º 29
0
def test_safe_coerce_to_frozenset__iterable():
    assert_equal(utils.safe_coerce_to_frozenset(xrange(3)), frozenset(
        (0, 1, 2)))
Exemplo n.º 30
0
def missing_executables(filenames):
    result = []
    for filename in safe_coerce_to_frozenset(filenames):
        if not executable_exists(filename):
            result.append(filename)
    return result
Exemplo n.º 31
0
def test_safe_coerce_to_frozenset__tuple():
    assert_equal(utils.safe_coerce_to_frozenset((1, 3, 2)), frozenset(((1, 3, 2))))
Exemplo n.º 32
0
def test_safe_coerce_to_frozenset__unicode():
    assert_equal(utils.safe_coerce_to_frozenset(u"foo"), frozenset((u"foo",)))
Exemplo n.º 33
0
def test_safe_coerce_to_frozenset__iterable():
    assert_equal(utils.safe_coerce_to_frozenset(xrange(3)), frozenset((0, 1, 2)))
Exemplo n.º 34
0
def missing_executables(filenames):
    result = []
    for filename in safe_coerce_to_frozenset(filenames):
        if not executable_exists(filename):
            result.append(filename)
    return result
Exemplo n.º 35
0
def test_safe_coerce_to_frozenset__dict():
    utils.safe_coerce_to_frozenset({1: 2, 3: 4})
Exemplo n.º 36
0
def test_safe_coerce_to_frozenset__int():
    assert_equal(utils.safe_coerce_to_frozenset(17), frozenset((17,)))
Exemplo n.º 37
0
 def _do_test_constructor__single_value(key, value):
     defaults = {"input_files": _EMPTY_FILE}
     defaults[key] = value
     node = Node(**defaults)
     expected = safe_coerce_to_frozenset(value)
     assert_equal(getattr(node, key), expected)
Exemplo n.º 38
0
def test_safe_coerce_to_frozenset__dict():
    utils.safe_coerce_to_frozenset({1 : 2, 3 : 4})
Exemplo n.º 39
0
def test_safe_coerce_to_frozenset__list():
    assert_equal(utils.safe_coerce_to_frozenset([1, 3, 2]), frozenset((1, 3, 2)))
Exemplo n.º 40
0
 def _do_test_constructor__single_value(key, value):
     node   = Node(**{key : value})
     expected = safe_coerce_to_frozenset(value)
     assert_equal(getattr(node, key), expected)
Exemplo n.º 41
0
 def _do_test_constructor__single_value(key, value):
     defaults = {"input_files": _EMPTY_FILE}
     defaults[key] = value
     node = Node(**defaults  )
     expected = safe_coerce_to_frozenset(value)
     assert_equal(getattr(node, key), expected)
Exemplo n.º 42
0
def test_safe_coerce_to_frozenset__str():
    assert_equal(utils.safe_coerce_to_frozenset("foo"), frozenset(("foo",)))