Exemplo n.º 1
0
 def test_comments(self):
     input_str = "([the quick]apple[brown],([fox]banjo,([jumps]cucumber[over the],[really]dogwood)[lazy]eggplant)) rhubarb[dog];"
     expected_comments = {
         "apple": ["the quick", "brown"],
         "banjo": ["fox"],
         "cucumber": ["jumps", "over the"],
         "dogwood": ["really"],
         "eggplant": ["lazy"],
         "rhubarb": ["dog"],
     }
     expected_tokens = [
         "(", "apple", ",", "(", "banjo", ",", "(", "cucumber", ",",
         "dogwood", ")", "eggplant", ")", ")", "rhubarb", ";"
     ]
     src = StringIO(input_str)
     observed_tokens = []
     tk = nexusprocessing.NexusTokenizer(src=src)
     for token in tk:
         if token in expected_comments:
             expected_comment = expected_comments[token]
             observed_comment = tk.pull_captured_comments()
             self.assertEqual(expected_comment, observed_comment)
             del expected_comments[token]
         observed_tokens.append(token)
     self.assertEqual(expected_comments, {})
     self.assertEqual(observed_tokens, expected_tokens)
Exemplo n.º 2
0
    def tree_iter(self,
            stream,
            taxon_symbol_mapper,
            tree_factory):
        """
        Iterator that yields trees in NEWICK-formatted source.

        Parameters
        ----------
        stream : file or file-like object
            A file or file-like object opened for reading.
        taxon_namespace : |TaxonNamespace|
            Operational taxonomic unit namespace to use for taxon management.
        tree_factory : function object
            A function that returns a new |Tree| object when called
            without arguments.

        Returns
        -------
        iter : :py`collections.Iterator` [|Tree|]
            An iterator yielding |Tree| objects constructed based on
            data in ``stream``.
        """
        nexus_tokenizer = nexusprocessing.NexusTokenizer(stream,
                preserve_unquoted_underscores=self.preserve_unquoted_underscores)
        while True:
            tree = self._parse_tree_statement(
                    nexus_tokenizer=nexus_tokenizer,
                    tree_factory=tree_factory,
                    taxon_symbol_map_fn=taxon_symbol_mapper.require_taxon_for_symbol)
            yield tree
            if tree is None:
                raise StopIteration
Exemplo n.º 3
0
 def f():
     for src_path in src_paths:
         if verbose:
             sys.stderr.write("  .. {}\n".format(src_path))
         src = open(src_path, "rU")
         nt = nexusprocessing.NexusTokenizer(src)
         for token in nt:
             pass
Exemplo n.º 4
0
 def f():
     for src_path in src_paths:
         if verbose:
             sys.stderr.write("  .. {}\n".format(src_path))
         src = open(src_path, "rU")
         nt = nexusprocessing.NexusTokenizer(src)
         np = newickreader.NewickTreeParser()
         while True:
             t = np.parse_tree_statement(nt, tree_factory=Tree)
             if t is None:
                 break
Exemplo n.º 5
0
 def _yield_items_from_stream(self, stream):
     nexus_tokenizer = nexusprocessing.NexusTokenizer(
         stream,
         preserve_unquoted_underscores=self.newick_reader.
         preserve_unquoted_underscores)
     taxon_symbol_mapper = nexusprocessing.NexusTaxonSymbolMapper(
         taxon_namespace=self.attached_taxon_namespace,
         enable_lookup_by_taxon_number=False,
         case_sensitive=self.newick_reader.case_sensitive_taxon_labels)
     while True:
         tree = self.newick_reader._parse_tree_statement(
             nexus_tokenizer=nexus_tokenizer,
             tree_factory=self.tree_factory,
             taxon_symbol_map_fn=taxon_symbol_mapper.
             require_taxon_for_symbol)
         if tree is None:
             break
         yield tree
Exemplo n.º 6
0
 def check_tokenization(self, input_str, expected_tokens):
     src = StringIO(input_str)
     observed = []
     for token in nexusprocessing.NexusTokenizer(src=src):
         observed.append(token)
     self.assertEqual(observed, expected_tokens)