Exemplo n.º 1
0
def setup_module():
    for schema in schema_extension_map:
        _TREE_FILEPATHS[schema] = {}
        for tree_file_title in tree_file_titles:
            tf = "{}.{}".format(tree_file_title, schema_extension_map[schema])
            _TREE_FILEPATHS[schema][tree_file_title] = pathmap.tree_source_path(tf)
    for tree_file_title in tree_file_titles:
        with open(_TREE_FILEPATHS["json"][tree_file_title]) as src:
            _TREE_REFERENCES[tree_file_title] = json.load(src)
        if "annotated" in tree_file_title:
            with open(_TREE_FILEPATHS["nexus-metadata-comments"][tree_file_title]) as src:
                _NEXUS_METADATA_COMMENTS[tree_file_title] = json.load(src)
 def test_out_of_range_collection_offset_newick_get(self):
     tree_file_title = 'dendropy-test-trees-n33-unrooted-x10a'
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
                 (dendropy.Tree.get_from_path, tree_filepath),
                 (dendropy.Tree.get_from_stream, tree_stream),
                 (dendropy.Tree.get_from_string, tree_string),
                 )
         for method, src in approaches:
             with self.assertRaises(IndexError):
                 method(src, "newick", collection_offset=1, tree_offset=0)
 def test_out_of_range_collection_offset_newick_get(self):
     tree_file_title = 'dendropy-test-trees-n33-unrooted-x10a'
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
             (dendropy.Tree.get_from_path, tree_filepath),
             (dendropy.Tree.get_from_stream, tree_stream),
             (dendropy.Tree.get_from_string, tree_string),
         )
         for method, src in approaches:
             with self.assertRaises(IndexError):
                 method(src, "newick", collection_offset=1, tree_offset=0)
 def test_out_of_range_tree_offset_newick_get(self):
     tree_file_title = 'dendropy-test-trees-n33-unrooted-x10a'
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     tree_reference = standard_file_test_trees._TREE_REFERENCES[tree_file_title]
     expected_number_of_trees = tree_reference["num_trees"]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
                 (dendropy.Tree.get_from_path, tree_filepath),
                 (dendropy.Tree.get_from_stream, tree_stream),
                 (dendropy.Tree.get_from_string, tree_string),
                 )
         for method, src in approaches:
             with self.assertRaises(IndexError):
                 method(src, "newick", collection_offset=0, tree_offset=expected_number_of_trees)
def get_splits_reference(splits_filename, splits_dir=None, key_column_index=0):
    # Key columns are:
    #     0   : PAUP* bipartition string representation '....**...' etc.
    #     1   : unnormalized split bitmask (for rooted trees) == leafset_bitmask for all trees and split_bitmask for rooted trees
    #     2   : normalized split bitmask (for unrooted trees) == split_bitmask for unrooted trees
    #     3   : (weighted) counts
    #     4   : (weighted) frequencies
    if splits_dir is not None:
        splits_filepath = os.path.join(splits_dir, splits_filename)
    else:
        splits_filepath = pathmap.splits_source_path(splits_filename)
    d = collections.OrderedDict()
    with open(splits_filepath, "r") as src:
        for row in src:
            content = row.split("#")[0]
            if not content:
                continue
            fields = content.split("\t")
            assert len(fields) == 5, "{}: {}".format(content, fields)
            for idx, field in enumerate(fields):
                fields[idx] = _SPLITS_REFERENCE_FIELD_TYPES[idx](fields[idx])
            key = fields[key_column_index]
            d[key] = {
                "bipartition_string": fields[0],
                "unnormalized_split_bitmask": fields[1],
                "normalized_split_bitmask": fields[2],
                "count": fields[3],
                "frequency": fields[4] / 100,
            }
    return d
Exemplo n.º 6
0
def get_splits_reference(
        splits_filename,
        splits_dir=None,
        key_column_index=0):
    # Key columns are:
    #     0   : PAUP* bipartition string representation '....**...' etc.
    #     1   : unnormalized split bitmask (for rooted trees) == leafset_bitmask for all trees and split_bitmask for rooted trees
    #     2   : normalized split bitmask (for unrooted trees) == split_bitmask for unrooted trees
    #     3   : (weighted) counts
    #     4   : (weighted) frequencies
    if splits_dir is not None:
        splits_filepath = os.path.join(splits_dir, splits_filename)
    else:
        splits_filepath = pathmap.splits_source_path(splits_filename)
    d = collections.OrderedDict()
    with open(splits_filepath, "r") as src:
        for row in src:
            content = row.split("#")[0]
            if not content:
                continue
            fields = content.split("\t")
            assert len(fields) == 5, "{}: {}".format(content, fields)
            for idx, field in enumerate(fields):
                fields[idx] = _SPLITS_REFERENCE_FIELD_TYPES[idx](fields[idx])
            key = fields[key_column_index]
            d[key] = {
                "bipartition_string": fields[0],
                "unnormalized_split_bitmask": fields[1],
                "normalized_split_bitmask": fields[2],
                "count": fields[3],
                "frequency": fields[4]/100,
            }
    return d
 def test_basic_parsing(self):
     tree_string = self.get_newick_string()
     reader_kwargs = {}
     with pathmap.SandboxedFile() as tempf:
         tempf.write(tree_string)
         tempf.flush()
         tree_filepath = tempf.name
         for suppress_internal_node_taxa in (None, False, True):
             if suppress_internal_node_taxa is None:
                 expected_suppress_internal_node_taxa = True
                 reader_kwargs.pop("suppress_internal_node_taxa", None)
             else:
                 expected_suppress_internal_node_taxa = suppress_internal_node_taxa
                 reader_kwargs[
                     "suppress_internal_node_taxa"] = suppress_internal_node_taxa
             for suppress_leaf_node_taxa in (None, False, True):
                 if suppress_leaf_node_taxa is None:
                     expected_suppress_leaf_node_taxa = False
                     reader_kwargs.pop("suppress_leaf_node_taxa", None)
                 else:
                     expected_suppress_leaf_node_taxa = suppress_leaf_node_taxa
                     reader_kwargs[
                         "suppress_leaf_node_taxa"] = suppress_leaf_node_taxa
                 for suppress_edge_lengths in (None, False, True):
                     if suppress_edge_lengths is None:
                         expected_suppress_edge_lengths = False
                         reader_kwargs.pop("suppress_edge_lengths", None)
                     else:
                         expected_suppress_edge_lengths = suppress_edge_lengths
                         reader_kwargs[
                             "suppress_edge_lengths"] = suppress_edge_lengths
                     with open(tree_filepath, "r") as tree_stream:
                         approaches = (
                             {
                                 "path": tree_filepath
                             },
                             {
                                 "file": tree_stream
                             },
                             {
                                 "data": tree_string
                             },
                         )
                         for approach_kwargs in approaches:
                             approach_kwargs.update(reader_kwargs)
                             approach_kwargs["schema"] = "newick"
                             t = dendropy.Tree.get(**approach_kwargs)
                             self.verify_curated_tree(
                                 t,
                                 suppress_internal_node_taxa=
                                 expected_suppress_internal_node_taxa,
                                 suppress_leaf_node_taxa=
                                 expected_suppress_leaf_node_taxa,
                                 suppress_edge_lengths=
                                 expected_suppress_edge_lengths)
 def test_tree_offset_newick_get(self):
     tree_file_title = "dendropy-test-trees-n33-unrooted-x100a"
     tree_reference = standard_file_test_trees._TREE_REFERENCES[
         tree_file_title]
     expected_number_of_trees = tree_reference["num_trees"]
     tree_offsets = set(
         [0, expected_number_of_trees - 1, -1, -expected_number_of_trees])
     while len(tree_offsets) < 8:
         tree_offsets.add(random.randint(1, expected_number_of_trees - 2))
     while len(tree_offsets) < 12:
         tree_offsets.add(random.randint(-expected_number_of_trees - 2, -2))
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     for tree_offset in tree_offsets:
         tree_reference = standard_file_test_trees._TREE_REFERENCES[
             tree_file_title]
         expected_number_of_trees = tree_reference["num_trees"]
         if tree_offset < 0:
             if abs(tree_offset) > expected_number_of_trees:
                 tree_offset = 0
             else:
                 tree_offset = expected_number_of_trees + tree_offset
         with open(tree_filepath, "r") as tree_stream:
             approaches = (
                 (dendropy.Tree.get_from_path, tree_filepath),
                 (dendropy.Tree.get_from_stream, tree_stream),
                 (dendropy.Tree.get_from_string, tree_string),
             )
             for method, src in approaches:
                 tree = method(src,
                               "newick",
                               collection_offset=0,
                               tree_offset=tree_offset,
                               suppress_internal_node_taxa=True,
                               suppress_leaf_node_taxa=False,
                               rooting="default-unrooted")
                 reference_tree_idx = tree_offset
                 self.compare_to_reference_by_title_and_index(
                     tree=tree,
                     tree_file_title=tree_file_title,
                     reference_tree_idx=tree_offset)
 def test_out_of_range_tree_offset_newick_get(self):
     tree_file_title = 'dendropy-test-trees-n33-unrooted-x10a'
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     tree_reference = standard_file_test_trees._TREE_REFERENCES[
         tree_file_title]
     expected_number_of_trees = tree_reference["num_trees"]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
             (dendropy.Tree.get_from_path, tree_filepath),
             (dendropy.Tree.get_from_stream, tree_stream),
             (dendropy.Tree.get_from_string, tree_string),
         )
         for method, src in approaches:
             with self.assertRaises(IndexError):
                 method(src,
                        "newick",
                        collection_offset=0,
                        tree_offset=expected_number_of_trees)
 def test_tree_offset_without_collection_offset_newick_get(self):
     tree_file_title = 'dendropy-test-trees-n33-unrooted-x10a'
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     tree_reference = standard_file_test_trees._TREE_REFERENCES[tree_file_title]
     expected_number_of_trees = tree_reference["num_trees"]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
                 (dendropy.Tree.get_from_path, tree_filepath),
                 (dendropy.Tree.get_from_stream, tree_stream),
                 (dendropy.Tree.get_from_string, tree_string),
                 )
         for approach in approaches:
             tree_offset = 2
             tree = approach[0](approach[1], "newick", tree_offset=tree_offset)
             reference_tree_idx = tree_offset
             self.compare_to_reference_by_title_and_index(
                     tree=tree,
                     tree_file_title=tree_file_title,
                     reference_tree_idx=tree_offset)
 def test_tree_offset_newick_get(self):
     tree_file_title = "dendropy-test-trees-n33-unrooted-x100a"
     tree_reference = standard_file_test_trees._TREE_REFERENCES[tree_file_title]
     expected_number_of_trees = tree_reference["num_trees"]
     tree_offsets = set([0, expected_number_of_trees-1, -1, -expected_number_of_trees])
     while len(tree_offsets) < 8:
         tree_offsets.add(random.randint(1, expected_number_of_trees-2))
     while len(tree_offsets) < 12:
         tree_offsets.add(random.randint(-expected_number_of_trees-2, -2))
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     for tree_offset in tree_offsets:
         tree_reference = standard_file_test_trees._TREE_REFERENCES[tree_file_title]
         expected_number_of_trees = tree_reference["num_trees"]
         if tree_offset < 0:
             if abs(tree_offset) > expected_number_of_trees:
                 tree_offset = 0
             else:
                 tree_offset = expected_number_of_trees + tree_offset
         with open(tree_filepath, "r") as tree_stream:
             approaches = (
                     (dendropy.Tree.get_from_path, tree_filepath),
                     (dendropy.Tree.get_from_stream, tree_stream),
                     (dendropy.Tree.get_from_string, tree_string),
                     )
             for method, src in approaches:
                 tree = method(
                         src,
                         "newick",
                         collection_offset=0,
                         tree_offset=tree_offset,
                         suppress_internal_node_taxa=True,
                         suppress_leaf_node_taxa=False,
                         rooting="default-unrooted")
                 reference_tree_idx = tree_offset
                 self.compare_to_reference_by_title_and_index(
                         tree=tree,
                         tree_file_title=tree_file_title,
                         reference_tree_idx=tree_offset)
 def test_read_metadata(self):
     tree_file_titles = [
         "dendropy-test-trees-multifurcating-rooted-annotated",
         "dendropy-test-trees-n33-unrooted-annotated-x10a",
     ]
     for tree_file_title in tree_file_titles:
         tree_filepath = standard_file_test_trees._TREE_FILEPATHS["newick"][tree_file_title]
         with open(tree_filepath, "r") as src:
             tree_string = src.read()
         with open(tree_filepath, "r") as tree_stream:
             approaches = (
                     (dendropy.TreeList.get_from_path, tree_filepath),
                     (dendropy.TreeList.get_from_stream, tree_stream),
                     (dendropy.TreeList.get_from_string, tree_string),
                     )
             for method, src in approaches:
                 tree_list = method(src,
                         "newick",
                         extract_comment_metadata=True)
                 self.verify_standard_trees(
                         tree_list=tree_list,
                         tree_file_title=tree_file_title)
Exemplo n.º 13
0
 def test_read_metadata(self):
     tree_file_titles = [
         "dendropy-test-trees-multifurcating-rooted-annotated",
         "dendropy-test-trees-n33-unrooted-annotated-x10a",
     ]
     for tree_file_title in tree_file_titles:
         tree_filepath = standard_file_test_trees._TREE_FILEPATHS["newick"][
             tree_file_title]
         with open(tree_filepath, "r") as src:
             tree_string = src.read()
         with open(tree_filepath, "r") as tree_stream:
             approaches = (
                 (dendropy.TreeList.get_from_path, tree_filepath),
                 (dendropy.TreeList.get_from_stream, tree_stream),
                 (dendropy.TreeList.get_from_string, tree_string),
             )
             for method, src in approaches:
                 tree_list = method(src,
                                    "newick",
                                    extract_comment_metadata=True)
                 self.verify_standard_trees(tree_list=tree_list,
                                            tree_file_title=tree_file_title)
 def check(self,
         title,
         src_prefix,
         to_retain=False):
     input_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"),
             schema='nexus')
     tns1 = dendropy.TaxonNamespace()
     input_ds.attach_taxon_namespace(tns1)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
             schema='nexus',
             taxon_namespace=input_taxa)
     tns2 = dendropy.TaxonNamespace()
     output_ds.attach_taxon_namespace(tns2)
     if to_retain:
         taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "r")
     else:
         taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "r")
     rows = taxf.readlines()
     taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ]
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         taxon_idxs = taxon_idxs_list[set_idx]
         sub_taxa = [src_trees.taxon_namespace[i] for i in taxon_idxs]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             if to_retain:
                 src_tree.retain_taxa(sub_taxa)
             else:
                 src_tree.prune_taxa(sub_taxa)
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
     taxf.close()
 def test_tree_offset_without_collection_offset_newick_get(self):
     tree_file_title = 'dendropy-test-trees-n33-unrooted-x10a'
     tree_filepath = self.schema_tree_filepaths[tree_file_title]
     tree_reference = standard_file_test_trees._TREE_REFERENCES[
         tree_file_title]
     expected_number_of_trees = tree_reference["num_trees"]
     with open(tree_filepath, "r") as src:
         tree_string = src.read()
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
             (dendropy.Tree.get_from_path, tree_filepath),
             (dendropy.Tree.get_from_stream, tree_stream),
             (dendropy.Tree.get_from_string, tree_string),
         )
         for approach in approaches:
             tree_offset = 2
             tree = approach[0](approach[1],
                                "newick",
                                tree_offset=tree_offset)
             reference_tree_idx = tree_offset
             self.compare_to_reference_by_title_and_index(
                 tree=tree,
                 tree_file_title=tree_file_title,
                 reference_tree_idx=tree_offset)
 def test_unsupported_keyword_arguments(self):
     tree_filepath = pathmap.tree_source_path('dendropy-test-trees-n12-x2.newick')
     tree_string = self.get_newick_string()
     reader_kwargs = {
             "suppress_internal_taxa": True,  # should be suppress_internal_node_taxa
             "gobbledegook": False,
     }
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
                 (dendropy.Tree.get_from_path, tree_filepath),
                 (dendropy.Tree.get_from_stream, tree_stream),
                 (dendropy.Tree.get_from_string, tree_string),
         )
         for method, src in approaches:
             with self.assertRaises(TypeError):
                 t = method(src, "newick", **reader_kwargs)
 def test_unsupported_keyword_arguments(self):
     tree_filepath = pathmap.tree_source_path(
         'dendropy-test-trees-n12-x2.newick')
     tree_string = self.get_newick_string()
     reader_kwargs = {
         "suppress_internal_taxa":
         True,  # should be suppress_internal_node_taxa
         "gobbledegook": False,
     }
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
             (dendropy.Tree.get_from_path, tree_filepath),
             (dendropy.Tree.get_from_stream, tree_stream),
             (dendropy.Tree.get_from_string, tree_string),
         )
         for method, src in approaches:
             with self.assertRaises(TypeError):
                 t = method(src, "newick", **reader_kwargs)
Exemplo n.º 18
0
 def iterate_over_file(self, current_file):
     if textprocessing.is_str_type(current_file):
         self._current_file = open(current_file, "r")
         self._current_file_name = current_file
     else:
         self._current_file = current_file
         try:
             self._current_file_name = self.current_file.name
         except AttributeError:
             self._current_file_name = None
     if hasattr(self._current_file, "__exit__"):
         with self._current_file:
             for item in self._yield_items_from_stream(stream=self._current_file):
                 yield item
     else:
         # StringIO does not support ``with``
         for item in self._yield_items_from_stream(stream=self._current_file):
             yield item
     self._current_file = None
 def test_basic_parsing(self):
     tree_string = self.get_newick_string()
     reader_kwargs = {}
     with pathmap.SandboxedFile() as tempf:
         tempf.write(tree_string)
         tempf.flush()
         tree_filepath = tempf.name
         for suppress_internal_node_taxa in (None, False, True):
             if suppress_internal_node_taxa is None:
                 expected_suppress_internal_node_taxa = True
                 reader_kwargs.pop("suppress_internal_node_taxa", None)
             else:
                 expected_suppress_internal_node_taxa = suppress_internal_node_taxa
                 reader_kwargs["suppress_internal_node_taxa"] = suppress_internal_node_taxa
             for suppress_leaf_node_taxa in (None, False, True):
                 if suppress_leaf_node_taxa is None:
                     expected_suppress_leaf_node_taxa = False
                     reader_kwargs.pop("suppress_leaf_node_taxa", None)
                 else:
                     expected_suppress_leaf_node_taxa = suppress_leaf_node_taxa
                     reader_kwargs["suppress_leaf_node_taxa"] = suppress_leaf_node_taxa
                 for suppress_edge_lengths in (None, False, True):
                     if suppress_edge_lengths is None:
                         expected_suppress_edge_lengths = False
                         reader_kwargs.pop("suppress_edge_lengths", None)
                     else:
                         expected_suppress_edge_lengths = suppress_edge_lengths
                         reader_kwargs["suppress_edge_lengths"] = suppress_edge_lengths
                     with open(tree_filepath, "r") as tree_stream:
                         approaches = (
                                 {"path": tree_filepath},
                                 {"file": tree_stream},
                                 {"data": tree_string},
                                 )
                         for approach_kwargs in approaches:
                             approach_kwargs.update(reader_kwargs)
                             approach_kwargs["schema"] = "newick"
                             t = dendropy.Tree.get(**approach_kwargs)
                             self.verify_curated_tree(t,
                                     suppress_internal_node_taxa=expected_suppress_internal_node_taxa,
                                     suppress_leaf_node_taxa=expected_suppress_leaf_node_taxa,
                                     suppress_edge_lengths=expected_suppress_edge_lengths)
Exemplo n.º 20
0
 def test_basic(self):
     tree_file_titles = [
         # "dendropy-test-trees-multifurcating-rooted-annotated",
         # "dendropy-test-trees-multifurcating-rooted",
         # "dendropy-test-trees-multifurcating-unrooted",
         # "dendropy-test-trees-n10-rooted-treeshapes",
         "dendropy-test-trees-n12-x2",
         "dendropy-test-trees-n33-unrooted-x10a",
         "dendropy-test-trees-n33-unrooted-x10b",
         "dendropy-test-trees-n33-unrooted-annotated-x10a",
         "dendropy-test-trees-n33-unrooted-annotated-x10a",
     ]
     expected_file_names = []
     expected_tree_references = []
     tree_files = []
     for file_idx, tree_file_title in enumerate(tree_file_titles):
         tree_filepath = self.schema_tree_filepaths[tree_file_title]
         if False and idx % 2 == 0:
             tree_files.append(open(tree_filepath, "r"))
         else:
             tree_files.append(tree_filepath)
         num_trees = self.tree_references[tree_file_title]["num_trees"]
         for tree_idx in range(num_trees):
             expected_file_names.append(tree_filepath)
             expected_tree_references.append(self.tree_references[tree_file_title][str(tree_idx)])
     collected_trees = []
     tns = dendropy.TaxonNamespace()
     # for f in tree_files:
     #     dendropy.TreeList.get_from_path(f, "nexus")
     tree_sources = dendropy.Tree.yield_from_files(
             files=tree_files,
             schema="nexus",
             taxon_namespace=tns)
     for tree_idx, tree in enumerate(tree_sources):
         self.assertEqual(tree_sources.current_file_name, expected_file_names[tree_idx])
         tree.current_file_name = tree_sources.current_file_name
         collected_trees.append(tree)
     self.assertEqual(len(collected_trees), len(expected_tree_references))
     for tree, ref_tree in zip(collected_trees, expected_tree_references):
         self.assertIs(tree.taxon_namespace, tns)
         self.compare_to_reference_tree(tree, ref_tree)
Exemplo n.º 21
0
 def read_expected_sfs(self, filename):
     filepath = pathmap.char_source_path(filename)
     with open(filepath) as src:
         return [int(v) for v in src.read().strip().split(",")]
Exemplo n.º 22
0
 def read_expected_sfs(self, filename):
     filepath = pathmap.char_source_path(filename)
     with open(filepath) as src:
         return [int(v) for v in src.read().strip().split(",")]