def testBoundTaxonNamespaceDefault(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonNamespace()
     d.attach_taxon_namespace(t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     d.read(path=pathmap.mixed_source_path(
         'reference_single_taxonset_dataset.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'),
            schema="fasta",
            data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 147)
 def testAttachTaxonNamespaceOnGet(self):
     t = dendropy.TaxonNamespace()
     d = dendropy.DataSet.get_from_path(
         pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
         "nexus",
         taxon_namespace=t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIsNot(d.attached_taxon_namespace, None)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     self.assertIs(d.attached_taxon_namespace, t)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.detach_taxon_namespace()
     d.read_from_path(
         pathmap.char_source_path('caenophidia_mos.chars.fasta'),
         schema="fasta",
         data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     self.assertEqual(len(d.taxon_namespaces[1]), 114)
 def check(self, title, src_prefix):
     tns = dendropy.TaxonNamespace()
     input_ds = dendropy.DataSet.get_from_path(
         src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"),
         schema='nexus',
         attached_taxon_namespace=tns)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
         src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
         schema='nexus',
         taxon_namespace=input_taxa)
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" %
                        (title, set_idx + 1, len(input_ds.tree_lists),
                         tree_idx + 1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             # d = src_tree.symmetric_difference(ref_tree)
             # if d > 0:
             #     print d
             self.assertEqual(
                 treecompare.symmetric_difference(src_tree, ref_tree), 0)
示例#4
0
 def setUp(self):
     self.support_trees_path = pathmap.tree_source_path(
         "primates.beast.mcmc.trees")
     self.target_tree_path = pathmap.tree_source_path(
         "primates.beast.mcct.noedgelens.tree")
     self.expected_tree_path = pathmap.tree_source_path(
         "primates.beast.mcct.medianh.tre")
     self.burnin = 40
 def test_encoding(self):
     for source_name in self.reference:
         # if "multifurcating" in source_name:
         #     continue
         tree_filepath = pathmap.tree_source_path(source_name)
         for rooting in self.reference[source_name]:
             for collapse_unrooted_basal_bifurcation_desc in self.reference[source_name][rooting]:
                 if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = True
                 elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = False
                 else:
                     raise ValueError(collapse_unrooted_basal_bifurcation_desc)
                 for suppress_unifurcations_desc in self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc]:
                     if "suppress_unifurcations=True" in suppress_unifurcations_desc:
                         suppress_unifurcations = True
                     elif "suppress_unifurcations=False" in suppress_unifurcations_desc:
                         suppress_unifurcations = False
                     else:
                         raise ValueError(suppress_unifurcations_desc)
                     trees_bipartitions_ref = self.reference[source_name][rooting][collapse_unrooted_basal_bifurcation_desc][suppress_unifurcations_desc]
                     source_path = pathmap.tree_source_path(source_name)
                     trees = dendropy.TreeList.get_from_path(
                             source_path,
                             "nexus",
                             rooting=rooting,
                             suppress_leaf_node_taxa=False,
                             suppress_internal_node_taxa=False,
                             )
                     for tree_idx, tree in enumerate(trees):
                         tree_bipartitions_ref = trees_bipartitions_ref[str(tree_idx)]
                         bipartition_encoding = tree.encode_bipartitions(
                                 suppress_unifurcations=suppress_unifurcations,
                                 collapse_unrooted_basal_bifurcation=collapse_unrooted_basal_bifurcation,
                                 )
                         seen = set()
                         for edge in tree.postorder_edge_iter():
                             bipartition = edge.bipartition
                             assert edge.head_node.taxon is not None
                             assert edge.head_node.taxon.label is not None
                             label = edge.head_node.taxon.label
                             # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, ))
                             # print("    {}".format(tree_bipartitions_ref[label]))
                             # print("    {} ({}), {}({})".format(
                             #     bipartition.split_bitmask,
                             #     bipartition.as_bitstring(),
                             #     bipartition.leafset_bitmask,
                             #     bipartition.leafset_as_bitstring(),
                             #     ))
                             expected_leafset_bitmask = int(tree_bipartitions_ref[label]["leafset_bitmask"])
                             self.assertEqual(bipartition.leafset_bitmask, expected_leafset_bitmask)
                             expected_split_bitmask = int(tree_bipartitions_ref[label]["split_bitmask"])
                             self.assertEqual(bipartition.split_bitmask, expected_split_bitmask)
示例#6
0
 def setUp(self):
     self.tree_list = dendropy.TreeList()
     for t in range(1, 5):
         tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t)
         self.tree_list.read_from_path(tf,
                                       'nexus',
                                       collection_offset=0,
                                       tree_offset=25)
     self.mb_con_tree = dendropy.Tree.get_from_path(
         pathmap.tree_source_path("pythonidae.mb.con"),
         schema="nexus",
         taxon_namespace=self.tree_list.taxon_namespace)
     self.mb_con_tree.encode_bipartitions()
 def setUp(self):
     self.tree_list = dendropy.TreeList()
     for t in range(1, 5):
         tf = pathmap.tree_source_path('pythonidae.mb.run%d.t' % t)
         self.tree_list.read_from_path(tf,
                 'nexus',
                 collection_offset=0,
                 tree_offset=25)
     self.mb_con_tree = dendropy.Tree.get_from_path(
             pathmap.tree_source_path("pythonidae.mb.con"),
             schema="nexus",
             taxon_namespace=self.tree_list.taxon_namespace)
     self.mb_con_tree.encode_bipartitions()
 def testMultiTaxonNamespace(self):
     d = dendropy.DataSet()
     d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[1]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 3)
     self.assertEqual(len(d.taxon_namespaces[2]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 4)
     self.assertEqual(len(d.taxon_namespaces[3]), 114)
示例#9
0
 def testBasicEst(self):
     # list of tuples, (birth-rate, log-likelihood)
     expected_results = (
             # birth rate               # log-likelihood
         (0.02879745490817826186758, -59.41355682054444287132355),
         (0.03074708092192806122012, -57.38280732060526645454956),
         (0.02539588437187430269848, -63.31025321526630023072357),
         (0.02261951969802362960582, -66.89924384677527768872096),
         (0.02804607815688910446572, -60.23314120509648716961237),
         (0.02748663302756114423797, -60.85775993426526042640035),
         (0.02816256618562208019485, -60.10465085978295007862471),
         (0.03592126646048716259729, -52.56123967307649991198559),
         (0.02905144990609926855529, -59.14133401672411594063306),
         (0.02703739196351075124714, -61.36860953277779628933786),
         (0.01981322730236481297061, -71.00561162515919022553135),
     )
     trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path("pythonidae.reference-trees.newick"), "newick")
     self.assertEqual(len(trees), len(expected_results))
     for tree, expected_result in zip(trees, expected_results):
         obs_result1 = birthdeath.fit_pure_birth_model(tree=tree, ultrametricity_precision=1e-5)
         obs_result2 = birthdeath.fit_pure_birth_model(internal_node_ages=tree.internal_node_ages(ultrametricity_precision=1e-5))
         for obs_result in (obs_result1, obs_result2):
             self.assertAlmostEqual(obs_result["birth_rate"], expected_result[0], 5)
             self.assertAlmostEqual(obs_result["log_likelihood"], expected_result[1], 5)
示例#10
0
 def testTrees(self):
     tree_files = [
         ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted",
          False),
         ("dendropy-test-trees-multifurcating-unrooted.nexus",
          "force-unrooted", False),
         ("pythonidae.beast.summary.tre", "force-rooted", True),
         ("primates.beast.mcct.medianh.tre", "force-rooted", True),
     ]
     for tree_file, rooting, is_rooted in tree_files:
         ref_tree = dendropy.Tree.get_from_path(
             pathmap.tree_source_path(tree_file), "nexus", rooting=rooting)
         bipartition_encoding = ref_tree.encode_bipartitions()
         t_tree = dendropy.Tree.from_bipartition_encoding(
             bipartition_encoding,
             taxon_namespace=ref_tree.taxon_namespace,
             is_rooted=ref_tree.is_rooted)
         # t_tree.encode_bipartitions()
         _LOG.debug("--\n       File: {} ({})".format(
             tree_file, ref_tree.is_rooted))
         _LOG.debug("     Original: {}".format(
             ref_tree.as_string("newick")))
         _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick")))
         self.assertEqual(
             treecompare.symmetric_difference(ref_tree, t_tree), 0)
示例#11
0
 def testBasicEst(self):
     # list of tuples, (birth-rate, log-likelihood)
     expected_results = (
         # birth rate               # log-likelihood
         (0.02879745490817826186758, -59.41355682054444287132355),
         (0.03074708092192806122012, -57.38280732060526645454956),
         (0.02539588437187430269848, -63.31025321526630023072357),
         (0.02261951969802362960582, -66.89924384677527768872096),
         (0.02804607815688910446572, -60.23314120509648716961237),
         (0.02748663302756114423797, -60.85775993426526042640035),
         (0.02816256618562208019485, -60.10465085978295007862471),
         (0.03592126646048716259729, -52.56123967307649991198559),
         (0.02905144990609926855529, -59.14133401672411594063306),
         (0.02703739196351075124714, -61.36860953277779628933786),
         (0.01981322730236481297061, -71.00561162515919022553135),
     )
     trees = dendropy.TreeList.get_from_path(
         pathmap.tree_source_path("pythonidae.reference-trees.newick"),
         "newick")
     self.assertEqual(len(trees), len(expected_results))
     for tree, expected_result in zip(trees, expected_results):
         obs_result1 = birthdeath.fit_pure_birth_model(
             tree=tree, ultrametricity_precision=1e-5)
         obs_result2 = birthdeath.fit_pure_birth_model(
             internal_node_ages=tree.internal_node_ages(
                 ultrametricity_precision=1e-5))
         for obs_result in (obs_result1, obs_result2):
             self.assertAlmostEqual(obs_result["birth_rate"],
                                    expected_result[0], 5)
             self.assertAlmostEqual(obs_result["log_likelihood"],
                                    expected_result[1], 5)
示例#12
0
        def check_splits_counting(self,
                tree_filename,
                taxa_definition_filepath,
                splits_filename,
                paup_as_rooted,
                paup_use_tree_weights,
                paup_burnin,
                expected_taxon_labels,
                expected_is_rooted,
                expected_num_trees,
                ):
            tree_filepath = pathmap.tree_source_path(tree_filename)
            paup_service = paup.PaupService()
            result = paup_service.count_splits_from_files(
                    tree_filepaths=[tree_filepath],
                    taxa_definition_filepath=taxa_definition_filepath,
                    is_rooted=paup_as_rooted,
                    use_tree_weights=paup_use_tree_weights,
                    burnin=paup_burnin,
                    )
            num_trees = result["num_trees"]
            bipartition_counts = result["bipartition_counts"]
            bipartition_freqs = result["bipartition_freqs"]
            taxon_namespace = result["taxon_namespace"]
            is_rooted = result["is_rooted"]

            # check taxon namespace
            self.assertEqual(len(taxon_namespace), len(expected_taxon_labels))
            for taxon, expected_label in zip(taxon_namespace, expected_taxon_labels):
                self.assertEqual(taxon.label, expected_label)

            # check general tree state
            self.assertEqual(num_trees, expected_num_trees)
            self.assertIs(is_rooted, expected_is_rooted)

            splits_ref = paupsplitsreference.get_splits_reference(
                    splits_filename=splits_filename,
                    key_column_index=0,
                    )
            self.assertEqual(len(splits_ref), len(bipartition_counts))
            self.assertEqual(len(splits_ref), len(bipartition_freqs))
            if is_rooted:
                splits_ref_bitmasks = set([splits_ref[x]["unnormalized_split_bitmask"] for x in splits_ref])
            else:
                splits_ref_bitmasks = set([splits_ref[x]["normalized_split_bitmask"] for x in splits_ref])
            counts_keys = set(bipartition_counts.keys())
            freqs_keys = set(bipartition_freqs.keys())
            self.assertEqual(len(counts_keys), len(splits_ref_bitmasks))
            self.assertEqual(counts_keys, splits_ref_bitmasks, "\n    {}\n\n    {}\n\n".format(sorted(counts_keys), sorted(splits_ref_bitmasks)))
            for split_str_rep in splits_ref:
                ref = splits_ref[split_str_rep]
                self.assertEqual(split_str_rep, ref["bipartition_string"])
                self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=False),
                        ref["unnormalized_split_bitmask"])
                self.assertEqual(paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=True),
                        ref["normalized_split_bitmask"])
                split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(split_str_rep, normalized=not is_rooted)
                self.assertEqual(bipartition_counts[split_bitmask], ref["count"])
                # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"])
                self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"], 2) # PAUP* 4.10b: no very precise
 def test_multiple_trees1(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.tree_lists), 3)
     for tt in ds.tree_lists:
         self.assertEqual(len(tt), 3)
 def test_multiple_trees1(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.tree_lists), 3)
     for tt in ds.tree_lists:
         self.assertEqual(len(tt), 3)
 def test_with_translate(self):
     srcs = (
         ("curated-with-translate-block-and-internal-taxa.nex", False),
         ("curated-with-translate-block-and-untranslated-internal-taxa.nex",
          True),
     )
     for src_filename, suppress_internal_taxa in srcs:
         src_path = pathmap.tree_source_path(src_filename)
         ds = dendropy.DataSet.get_from_path(
             src_path,
             "nexus",
             suppress_internal_node_taxa=suppress_internal_taxa)
         self.assertEqual(len(ds.tree_lists), 1)
         tree_list = ds.tree_lists[0]
         tree_labels = ("1", "2", "3")
         self.assertEqual(len(tree_list), len(tree_labels))
         for tree_idx, (tree,
                        label) in enumerate(zip(tree_list, tree_labels)):
             self.assertEqual(tree.label, label)
             self.verify_curated_tree(
                 tree=tree,
                 suppress_internal_node_taxa=suppress_internal_taxa,
                 suppress_leaf_node_taxa=False,
                 suppress_edge_lengths=False,
                 node_taxon_label_map=None)
 def testBoundTaxonNamespaceDefault(self):
     d = dendropy.DataSet()
     t = dendropy.TaxonNamespace()
     d.attach_taxon_namespace(t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     d.read(path=pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 147)
 def test_multiple_trees2(self):
     src_filename = "multitreeblocks2.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.taxon_namespaces), 1)
     self.assertEqual(len(ds.tree_lists), 2)
     for tt in ds.tree_lists:
         self.assertEqual(len(tt), 2)
         self.assertIs(tt.taxon_namespace, ds.taxon_namespaces[0])
 def test_with_translate_but_no_taxa_block(self):
     src_filename = "curated-with-translate-block-and-no-taxa-block-and-untranslated-internal-taxa.nex"
     src_path = pathmap.tree_source_path(src_filename)
     tree_list = dendropy.TreeList.get_from_path(src_path, "nexus")
     tree_labels = ("1", "2", "3")
     self.assertEqual(len(tree_list), len(tree_labels))
     for tree_idx, (tree, label) in enumerate(zip(tree_list, tree_labels)):
         self.assertEqual(tree.label, label)
         self.verify_curated_tree(tree=tree)
 def test_multiple_trees2(self):
     src_filename = "multitreeblocks2.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.taxon_namespaces), 1)
     self.assertEqual(len(ds.tree_lists), 2)
     for tt in ds.tree_lists:
         self.assertEqual(len(tt), 2)
         self.assertIs(tt.taxon_namespace, ds.taxon_namespaces[0])
示例#20
0
 def setUp(self):
     self.trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"),
             "nexus")
     self.split_distribution = dendropy.SplitDistribution(taxon_namespace=self.trees.taxon_namespace)
     for tree in self.trees:
         self.split_distribution.count_splits_on_tree(
                 tree,
                 is_bipartitions_updated=False)
 def check_split_counting(
     self,
     tree_filename,
     test_as_rooted,
     parser_rooting_interpretation,
     test_ignore_tree_weights=False,
     dp_ignore_tree_weights=False,
 ):
     tree_filepath = pathmap.tree_source_path(tree_filename)
     ps = paup.PaupService()
     paup_sd = ps.get_split_distribution_from_files(
         tree_filepaths=[tree_filepath],
         is_rooted=test_as_rooted,
         use_tree_weights=not test_ignore_tree_weights,
         burnin=0,
         taxa_definition_filepath=tree_filepath)
     taxon_namespace = paup_sd.taxon_namespace
     dp_sd = dendropy.SplitDistribution(taxon_namespace=taxon_namespace)
     dp_sd.ignore_edge_lengths = True
     dp_sd.ignore_node_ages = True
     dp_sd.ignore_tree_weights = dp_ignore_tree_weights
     taxa_mask = taxon_namespace.all_taxa_bitmask()
     taxon_namespace.is_mutable = False
     trees = dendropy.TreeList.get_from_path(
         tree_filepath,
         "nexus",
         rooting=parser_rooting_interpretation,
         taxon_namespace=taxon_namespace)
     for tree in trees:
         self.assertIs(tree.taxon_namespace, taxon_namespace)
         self.assertIs(tree.taxon_namespace, dp_sd.taxon_namespace)
         dp_sd.count_splits_on_tree(tree, is_bipartitions_updated=False)
     self.assertEqual(dp_sd.total_trees_counted,
                      paup_sd.total_trees_counted)
     taxa_mask = taxon_namespace.all_taxa_bitmask()
     for split in dp_sd.split_counts:
         if not dendropy.Bipartition.is_trivial_bitmask(
                 split, taxa_mask):
             # if split not in paup_sd.split_counts:
             #     print("{}: {}".format(split, split in paup_sd.split_counts))
             #     s2 = taxon_namespace.normalize_bitmask(split)
             #     print("{}: {}".format(s2, s2 in paup_sd.split_counts))
             #     s3 = ~split & taxon_namespace.all_taxa_bitmask()
             #     print("{}: {}".format(s3, s3 in paup_sd.split_counts))
             self.assertIn(split, paup_sd.split_counts,
                           "split not found")
             self.assertEqual(dp_sd.split_counts[split],
                              paup_sd.split_counts[split],
                              "incorrect split frequency")
             del paup_sd.split_counts[split]
     remaining_splits = list(paup_sd.split_counts.keys())
     for split in remaining_splits:
         if dendropy.Bipartition.is_trivial_bitmask(split, taxa_mask):
             del paup_sd.split_counts[split]
     self.assertEqual(len(paup_sd.split_counts), 0)
 def testMidpointRooting(self):
     taxa = dendropy.TaxonNamespace()
     test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'),
             "nexus",
             taxon_namespace=taxa,
             rooting="force-rooted")
     expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'),
             "nexus",
             taxon_namespace=taxa,
             rooting="force-rooted")
     for idx, test_tree in enumerate(test_trees):
         expected_tree = expected_trees[idx]
         test_tree.reroot_at_midpoint(update_bipartitions=True)
         self.assertEqual(treecompare.symmetric_difference(test_tree, expected_tree), 0)
         for bipartition in test_tree.bipartition_encoding:
             if test_tree.bipartition_edge_map[bipartition].head_node is test_tree.seed_node:
                 continue
             # self.assertAlmostEqual(bipartition.edge.length, expected_tree.split_bitmask_edge_map[bipartition.split_bitmask].length, 3)
             self.assertAlmostEqual(test_tree.bipartition_edge_map[bipartition].length,
                     expected_tree.bipartition_edge_map[bipartition].length,
                     3)
 def test_shared_taxon_namespace(self):
     tree_filenames = [
             ("pythonidae.reference-trees.newick", 33), # ntax = 33
             ("pythonidae.reference-trees.newick", 33), # ntax = 33
             ("bird_orders.newick", 56), # ntax = 23
             ("pythonidae.reference-trees.taxon-numbers-only.newick", 89), # ntax = 33
             ("pythonidae.reference-trees.newick", 89), # ntax = 33
             ("bird_orders.newick", 89), # ntax = 23
     ]
     common_taxon_namespace = dendropy.TaxonNamespace()
     prev_expected_ntax = 0
     for tree_filename, expected_ntax in tree_filenames:
         self.assertEqual(len(common_taxon_namespace), prev_expected_ntax)
         tree_filepath = pathmap.tree_source_path(tree_filename)
         for reps in range(3):
             tree_list = dendropy.TreeList.get_from_path(
                     pathmap.tree_source_path(tree_filename),
                     "newick",
                     taxon_namespace=common_taxon_namespace)
             self.assertEqual(len(common_taxon_namespace), expected_ntax)
         prev_expected_ntax = expected_ntax
 def testAttachTaxonNamespaceOnGet(self):
     t = dendropy.TaxonNamespace()
     d = dendropy.DataSet.get_from_path(pathmap.mixed_source_path('reference_single_taxonset_dataset.nex'),
             "nexus",
             taxon_namespace=t)
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertIsNot(d.attached_taxon_namespace, None)
     self.assertIs(d.taxon_namespaces[0], d.attached_taxon_namespace)
     self.assertIs(d.attached_taxon_namespace, t)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'), schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.reference-trees.newick'), schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.detach_taxon_namespace()
     d.read_from_path(pathmap.char_source_path('caenophidia_mos.chars.fasta'), schema="fasta", data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     self.assertEqual(len(d.taxon_namespaces[1]), 114)
 def testMultiTaxonNamespace(self):
     d = dendropy.DataSet()
     d.read(path=pathmap.mixed_source_path(
         'reference_single_taxonset_dataset.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 1)
     self.assertEqual(len(d.taxon_namespaces[0]), 33)
     d.read(path=pathmap.tree_source_path('pythonidae.mle.nex'),
            schema="nexus")
     self.assertEqual(len(d.taxon_namespaces), 2)
     self.assertEqual(len(d.taxon_namespaces[1]), 33)
     d.read(
         path=pathmap.tree_source_path('pythonidae.reference-trees.newick'),
         schema="newick")
     self.assertEqual(len(d.taxon_namespaces), 3)
     self.assertEqual(len(d.taxon_namespaces[2]), 33)
     d.read(path=pathmap.char_source_path('caenophidia_mos.chars.fasta'),
            schema="fasta",
            data_type="protein")
     self.assertEqual(len(d.taxon_namespaces), 4)
     self.assertEqual(len(d.taxon_namespaces[3]), 114)
 def check(self, title, src_prefix):
     tns = dendropy.TaxonNamespace()
     input_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"),
             schema='nexus',
             attached_taxon_namespace=tns)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
             schema='nexus',
             taxon_namespace=input_taxa)
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             # d = src_tree.symmetric_difference(ref_tree)
             # if d > 0:
             #     print d
             self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
示例#27
0
 def test_shared_taxon_namespace(self):
     tree_filenames = [
         ("pythonidae.reference-trees.newick", 33),  # ntax = 33
         ("pythonidae.reference-trees.newick", 33),  # ntax = 33
         ("bird_orders.newick", 56),  # ntax = 23
         ("pythonidae.reference-trees.taxon-numbers-only.newick",
          89),  # ntax = 33
         ("pythonidae.reference-trees.newick", 89),  # ntax = 33
         ("bird_orders.newick", 89),  # ntax = 23
     ]
     common_taxon_namespace = dendropy.TaxonNamespace()
     prev_expected_ntax = 0
     for tree_filename, expected_ntax in tree_filenames:
         self.assertEqual(len(common_taxon_namespace), prev_expected_ntax)
         tree_filepath = pathmap.tree_source_path(tree_filename)
         for reps in range(3):
             tree_list = dendropy.TreeList.get_from_path(
                 pathmap.tree_source_path(tree_filename),
                 "newick",
                 taxon_namespace=common_taxon_namespace)
             self.assertEqual(len(common_taxon_namespace), expected_ntax)
         prev_expected_ntax = expected_ntax
 def check_split_counting(self,
         tree_filename,
         test_as_rooted,
         parser_rooting_interpretation,
         test_ignore_tree_weights=False,
         dp_ignore_tree_weights=False,
         ):
     tree_filepath = pathmap.tree_source_path(tree_filename)
     ps = paup.PaupService()
     paup_sd = ps.get_split_distribution_from_files(
             tree_filepaths=[tree_filepath],
             is_rooted=test_as_rooted,
             use_tree_weights=not test_ignore_tree_weights,
             burnin=0,
             taxa_definition_filepath=tree_filepath
             )
     taxon_namespace = paup_sd.taxon_namespace
     dp_sd = dendropy.SplitDistribution(taxon_namespace=taxon_namespace)
     dp_sd.ignore_edge_lengths = True
     dp_sd.ignore_node_ages = True
     dp_sd.ignore_tree_weights = dp_ignore_tree_weights
     taxa_mask = taxon_namespace.all_taxa_bitmask()
     taxon_namespace.is_mutable = False
     trees = dendropy.TreeList.get_from_path(tree_filepath,
             "nexus",
             rooting=parser_rooting_interpretation,
             taxon_namespace=taxon_namespace)
     for tree in trees:
         self.assertIs(tree.taxon_namespace, taxon_namespace)
         self.assertIs(tree.taxon_namespace, dp_sd.taxon_namespace)
         dp_sd.count_splits_on_tree(
                 tree,
                 is_bipartitions_updated=False)
     self.assertEqual(dp_sd.total_trees_counted, paup_sd.total_trees_counted)
     taxa_mask = taxon_namespace.all_taxa_bitmask()
     for split in dp_sd.split_counts:
         if not dendropy.Bipartition.is_trivial_bitmask(split, taxa_mask):
             # if split not in paup_sd.split_counts:
             #     print("{}: {}".format(split, split in paup_sd.split_counts))
             #     s2 = taxon_namespace.normalize_bitmask(split)
             #     print("{}: {}".format(s2, s2 in paup_sd.split_counts))
             #     s3 = ~split & taxon_namespace.all_taxa_bitmask()
             #     print("{}: {}".format(s3, s3 in paup_sd.split_counts))
             self.assertIn(split, paup_sd.split_counts, "split not found")
             self.assertEqual(dp_sd.split_counts[split], paup_sd.split_counts[split], "incorrect split frequency")
             del paup_sd.split_counts[split]
     remaining_splits = list(paup_sd.split_counts.keys())
     for split in remaining_splits:
         if dendropy.Bipartition.is_trivial_bitmask(split, taxa_mask):
             del paup_sd.split_counts[split]
     self.assertEqual(len(paup_sd.split_counts), 0)
 def test_compatibility(self):
     regimes = (
         ("dendropy-test-trees-n12-x2.nexus", "all"),
         ("dendropy-test-trees-n33-unrooted-x100a.nexus", "from-trees"),
         ("dendropy-test-trees-n10-rooted-treeshapes.nexus", "all"),
     )
     for trees_filename_idx, (trees_filename, bipartition_generation_mode) in enumerate(regimes):
         trees_filepath = pathmap.tree_source_path(trees_filename)
         trees = dendropy.TreeList.get_from_path(
                 trees_filepath,
                 "nexus",)
         bipartitions = generate_bipartitions(trees, bipartition_generation_mode, is_rooted=trees[0].is_rooted)
         # for bipartition1_idx, bipartition1 in enumerate(bipartitions):
         for bipartition1_idx, bipartition1 in enumerate(bipartitions):
             for tree_idx, tree in enumerate(trees):
                 compatible_bipartitions = set()
                 incompatible_bipartitions = set()
                 bipartition_encoding = tree.encode_bipartitions()
                 for biparition2_idx, bipartition2 in enumerate(bipartition_encoding):
                     if bipartition2.is_compatible_with(bipartition1):
                         self.assertTrue(bipartition1.is_compatible_with(bipartition2))
                         compatible_bipartitions.add(bipartition2)
                     else:
                         self.assertFalse(bipartition1.is_compatible_with(bipartition2))
                         incompatible_bipartitions.add(bipartition2)
                 is_compatible = tree.is_compatible_with_bipartition(bipartition1)
                 self.assertEqual(len(compatible_bipartitions) + len(incompatible_bipartitions), len(bipartition_encoding))
                 if is_compatible:
                     self.assertEqual(len(incompatible_bipartitions), 0,
                             "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found compatible with tree, but is incompatible with following bipartitions on tree: {}".
                             format(
                                 tree_idx,
                                 trees_filename,
                                 bipartition1.split_as_bitstring(),
                                 bipartition1.leafset_as_bitstring(),
                                 bipartition1_idx,
                                 [b.split_as_bitstring() for b in incompatible_bipartitions],
                                 ))
                     self.assertEqual(len(compatible_bipartitions), len(bipartition_encoding))
                 else:
                     self.assertTrue(len(incompatible_bipartitions) > 0,
                             "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found incompatible with tree, but is compatible with all bipartitions on tree: {}".
                             format(
                                 tree_idx,
                                 trees_filename,
                                 bipartition1.split_as_bitstring(),
                                 bipartition1.leafset_as_bitstring(),
                                 bipartition1_idx,
                                 [b.split_as_bitstring() for b in compatible_bipartitions],
                                 ))
 def check(self,
         title,
         src_prefix,
         to_retain=False):
     input_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"),
             schema='nexus')
     tns1 = dendropy.TaxonNamespace()
     input_ds.attach_taxon_namespace(tns1)
     input_taxa = input_ds.taxon_namespaces[0]
     output_ds = dendropy.DataSet.get_from_path(
             src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"),
             schema='nexus',
             taxon_namespace=input_taxa)
     tns2 = dendropy.TaxonNamespace()
     output_ds.attach_taxon_namespace(tns2)
     if to_retain:
         taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "r")
     else:
         taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "r")
     rows = taxf.readlines()
     taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ]
     for set_idx, src_trees in enumerate(input_ds.tree_lists):
         src_trees = input_ds.tree_lists[set_idx]
         ref_trees = output_ds.tree_lists[set_idx]
         taxon_idxs = taxon_idxs_list[set_idx]
         sub_taxa = [src_trees.taxon_namespace[i] for i in taxon_idxs]
         for tree_idx, src_tree in enumerate(src_trees):
             _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees)))
             ref_tree = ref_trees[tree_idx]
             if to_retain:
                 src_tree.retain_taxa(sub_taxa)
             else:
                 src_tree.prune_taxa(sub_taxa)
             # tree_dist = paup.symmetric_difference(src_tree, ref_tree)
             self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
     taxf.close()
 def test_multiple_trees2(self):
     src_filename = "multitreeblocks2.nex"
     src_path = pathmap.tree_source_path(src_filename)
     trees = dendropy.TreeList.get_from_path(src_path, "nexus")
     self.assertEqual(len(trees), 4)
     labels = ["x2.1","x2.2","x2.3","x2.4"]
     # self.assertEqual(len(trees.taxon_namespace), len(labels))
     self.assertEqual([t.label for t in trees.taxon_namespace], labels)
     for tree in trees:
         self.assertIs(tree.taxon_namespace, trees.taxon_namespace)
         seen_taxa = 0
         for nd in tree:
             if nd.taxon is not None:
                 seen_taxa += 1
                 self.assertIn(nd.taxon, tree.taxon_namespace)
         self.assertEqual(seen_taxa, len(tree.taxon_namespace))
 def test_unsupported_keyword_arguments(self):
     tree_filepath = pathmap.tree_source_path('dendropy-test-trees-n12-x2.newick')
     tree_string = self.get_newick_string()
     reader_kwargs = {
             "suppress_internal_taxa": True,  # should be suppress_internal_node_taxa
             "gobbledegook": False,
     }
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
                 (dendropy.Tree.get_from_path, tree_filepath),
                 (dendropy.Tree.get_from_stream, tree_stream),
                 (dendropy.Tree.get_from_string, tree_string),
         )
         for method, src in approaches:
             with self.assertRaises(TypeError):
                 t = method(src, "newick", **reader_kwargs)
 def test_unsupported_keyword_arguments(self):
     tree_filepath = pathmap.tree_source_path(
         'dendropy-test-trees-n12-x2.newick')
     tree_string = self.get_newick_string()
     reader_kwargs = {
         "suppress_internal_taxa":
         True,  # should be suppress_internal_node_taxa
         "gobbledegook": False,
     }
     with open(tree_filepath, "r") as tree_stream:
         approaches = (
             (dendropy.Tree.get_from_path, tree_filepath),
             (dendropy.Tree.get_from_stream, tree_stream),
             (dendropy.Tree.get_from_string, tree_string),
         )
         for method, src in approaches:
             with self.assertRaises(TypeError):
                 t = method(src, "newick", **reader_kwargs)
 def testTrees(self):
     tree_files = [
             ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted", False),
             ("dendropy-test-trees-multifurcating-unrooted.nexus", "force-unrooted", False),
             ("pythonidae.beast.summary.tre", "force-rooted", True),
             ("primates.beast.mcct.medianh.tre", "force-rooted", True),
             ]
     for tree_file, rooting, is_rooted in tree_files:
         ref_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path(tree_file),
                 "nexus",
                 rooting=rooting)
         bipartition_encoding = ref_tree.encode_bipartitions()
         t_tree = dendropy.Tree.from_bipartition_encoding(
                 bipartition_encoding,
                 taxon_namespace=ref_tree.taxon_namespace,
                 is_rooted=ref_tree.is_rooted)
         # t_tree.encode_bipartitions()
         _LOG.debug("--\n       File: {} ({})".format(tree_file, ref_tree.is_rooted))
         _LOG.debug("     Original: {}".format(ref_tree.as_string("newick")))
         _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick")))
         self.assertEqual(treecompare.symmetric_difference(ref_tree, t_tree), 0)
示例#35
0
 def verify_pscores(self, char_fname, trees_fname, gaps_as_missing, expected_scores):
     dataset = dendropy.DataSet.get_from_path(
             pathmap.char_source_path(char_fname),
             "nexus")
     dataset.read_from_path(
             pathmap.tree_source_path(trees_fname),
             schema='NEXUS',
             taxon_namespace=dataset.taxon_namespaces[0])
     char_mat = dataset.char_matrices[0]
     # sa = char_mat.default_state_alphabet
     # for x in sa:
     #     print("{}: {}".format(x, x.is_gap_state))
     # for x in sa:
     #     print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing))
     taxon_state_sets_map = char_mat.taxon_state_sets_map(gaps_as_missing=gaps_as_missing)
     tree_list = dataset.tree_lists[0]
     self.assertEqual(len(expected_scores), len(tree_list))
     for n, tree in enumerate(tree_list):
         node_list = tree.postorder_node_iter()
         pscore = fitch_down_pass(node_list, taxon_state_sets_map=taxon_state_sets_map)
         # print("{} vs. {}".format(expected_scores[n], pscore))
         self.assertEqual(expected_scores[n], pscore)
 def test_with_translate(self):
     srcs = (
         ("curated-with-translate-block-and-internal-taxa.nex", False),
         ("curated-with-translate-block-and-untranslated-internal-taxa.nex", True),
         )
     for src_filename, suppress_internal_taxa in srcs:
         src_path = pathmap.tree_source_path(src_filename)
         ds = dendropy.DataSet.get_from_path(
                 src_path,
                 "nexus",
                 suppress_internal_node_taxa=suppress_internal_taxa)
         self.assertEqual(len(ds.tree_lists), 1)
         tree_list = ds.tree_lists[0]
         tree_labels = ("1", "2", "3")
         self.assertEqual(len(tree_list), len(tree_labels))
         for tree_idx, (tree, label) in enumerate(zip(tree_list, tree_labels)):
             self.assertEqual(tree.label, label)
             self.verify_curated_tree(
                     tree=tree,
                     suppress_internal_node_taxa=suppress_internal_taxa,
                     suppress_leaf_node_taxa=False,
                     suppress_edge_lengths=False,
                     node_taxon_label_map=None)
示例#37
0
 def verify_pscores(self, char_fname, trees_fname, gaps_as_missing,
                    expected_scores):
     dataset = dendropy.DataSet.get_from_path(
         pathmap.char_source_path(char_fname), "nexus")
     dataset.read_from_path(pathmap.tree_source_path(trees_fname),
                            schema='NEXUS',
                            taxon_namespace=dataset.taxon_namespaces[0])
     char_mat = dataset.char_matrices[0]
     # sa = char_mat.default_state_alphabet
     # for x in sa:
     #     print("{}: {}".format(x, x.is_gap_state))
     # for x in sa:
     #     print("{}\t{}\t{}\t\t\t\t{}".format(x, x._index, x.fundamental_indexes, x.fundamental_indexes_with_gaps_as_missing))
     taxon_state_sets_map = char_mat.taxon_state_sets_map(
         gaps_as_missing=gaps_as_missing)
     tree_list = dataset.tree_lists[0]
     self.assertEqual(len(expected_scores), len(tree_list))
     for n, tree in enumerate(tree_list):
         node_list = tree.postorder_node_iter()
         pscore = fitch_down_pass(node_list,
                                  taxon_state_sets_map=taxon_state_sets_map)
         # print("{} vs. {}".format(expected_scores[n], pscore))
         self.assertEqual(expected_scores[n], pscore)
    def verify_pscores(self,
            trees_fname,
            chars_fname,
            matrix_type,
            gaps_as_missing,
            expected_scores,
            expected_per_site_scores):
        taxon_namespace = dendropy.TaxonNamespace()
        chars = matrix_type.get(
                path=pathmap.char_source_path(chars_fname),
                schema="nexus",
                taxon_namespace=taxon_namespace)
        trees = dendropy.TreeList.get(
                path=pathmap.tree_source_path(trees_fname),
                schema="nexus",
                taxon_namespace=taxon_namespace)
        self.assertEqual(len(expected_scores), len(trees))
        for tree_idx, tree in enumerate(trees):
            score_by_character_list = []
            pscore = treescore.parsimony_score(
                    tree,
                    chars,
                    gaps_as_missing=gaps_as_missing,
                    score_by_character_list=score_by_character_list)
            self.assertEqual(pscore, expected_scores[tree_idx])
            self.assertEqual(len(score_by_character_list), len(expected_per_site_scores[tree_idx]))
            for obs, exp in zip(score_by_character_list, expected_per_site_scores[tree_idx]):
                self.assertEqual(obs, exp)
            self.assertEqual(sum(score_by_character_list), pscore)

            # just to be sure it works without passing in `score_by_character_list`:
            pscore = treescore.parsimony_score(
                    tree,
                    chars,
                    gaps_as_missing=gaps_as_missing)
            self.assertEqual(pscore, expected_scores[tree_idx])
 def test_multiple_trees1(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     trees = dendropy.TreeList.get_from_path(src_path, "nexus")
     self.assertEqual(len(trees), 9)
示例#40
0
 def test_encoding(self):
     for source_name in self.reference:
         # if "multifurcating" in source_name:
         #     continue
         tree_filepath = pathmap.tree_source_path(source_name)
         for rooting in self.reference[source_name]:
             for collapse_unrooted_basal_bifurcation_desc in self.reference[
                     source_name][rooting]:
                 if "collapse_unrooted_basal_bifurcation=True" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = True
                 elif "collapse_unrooted_basal_bifurcation=False" in collapse_unrooted_basal_bifurcation_desc:
                     collapse_unrooted_basal_bifurcation = False
                 else:
                     raise ValueError(
                         collapse_unrooted_basal_bifurcation_desc)
                 for suppress_unifurcations_desc in self.reference[
                         source_name][rooting][
                             collapse_unrooted_basal_bifurcation_desc]:
                     if "suppress_unifurcations=True" in suppress_unifurcations_desc:
                         suppress_unifurcations = True
                     elif "suppress_unifurcations=False" in suppress_unifurcations_desc:
                         suppress_unifurcations = False
                     else:
                         raise ValueError(suppress_unifurcations_desc)
                     trees_bipartitions_ref = self.reference[source_name][
                         rooting][collapse_unrooted_basal_bifurcation_desc][
                             suppress_unifurcations_desc]
                     source_path = pathmap.tree_source_path(source_name)
                     trees = dendropy.TreeList.get_from_path(
                         source_path,
                         "nexus",
                         rooting=rooting,
                         suppress_leaf_node_taxa=False,
                         suppress_internal_node_taxa=False,
                     )
                     for tree_idx, tree in enumerate(trees):
                         tree_bipartitions_ref = trees_bipartitions_ref[str(
                             tree_idx)]
                         bipartition_encoding = tree.encode_bipartitions(
                             suppress_unifurcations=suppress_unifurcations,
                             collapse_unrooted_basal_bifurcation=
                             collapse_unrooted_basal_bifurcation,
                         )
                         seen = set()
                         for edge in tree.postorder_edge_iter():
                             bipartition = edge.bipartition
                             assert edge.head_node.taxon is not None
                             assert edge.head_node.taxon.label is not None
                             label = edge.head_node.taxon.label
                             # print("{}: {}: {}: {}".format(source_name, tree_idx, rooting, label, ))
                             # print("    {}".format(tree_bipartitions_ref[label]))
                             # print("    {} ({}), {}({})".format(
                             #     bipartition.split_bitmask,
                             #     bipartition.as_bitstring(),
                             #     bipartition.leafset_bitmask,
                             #     bipartition.leafset_as_bitstring(),
                             #     ))
                             expected_leafset_bitmask = int(
                                 tree_bipartitions_ref[label]
                                 ["leafset_bitmask"])
                             self.assertEqual(bipartition.leafset_bitmask,
                                              expected_leafset_bitmask)
                             expected_split_bitmask = int(
                                 tree_bipartitions_ref[label]
                                 ["split_bitmask"])
                             self.assertEqual(bipartition.split_bitmask,
                                              expected_split_bitmask)
示例#41
0
    def check_splits_distribution(self,
            tree_filename,
            splits_filename,
            use_tree_weights,
            is_rooted,
            expected_num_trees,
            ):
        if is_rooted is None:
            key_column_index = 2 # default to unrooted: normalized split bitmask
        elif is_rooted:
            key_column_index = 1 # leafset_bitmask / unnormalized split bitmask
        else:
            key_column_index = 2 # normalized split bitmask
        splits_ref = paupsplitsreference.get_splits_reference(
                splits_filename=splits_filename,
                key_column_index=key_column_index,
                )
        # print("* {} ({})".format(tree_filename, splits_filename))
        tree_filepath = pathmap.tree_source_path(tree_filename)
        trees = dendropy.TreeList.get_from_path(
                tree_filepath,
                "nexus",
                store_tree_weights=use_tree_weights)
        sd = dendropy.SplitDistribution(
                taxon_namespace=trees.taxon_namespace,
                use_tree_weights=use_tree_weights)
        for tree in trees:
            sd.count_splits_on_tree(tree)

        # trees counted ...
        self.assertEqual(sd.total_trees_counted, len(trees))
        # frequencies have not yet been calculated
        self.assertEqual(sd._trees_counted_for_freqs, 0)
        self.assertFalse(sd.is_mixed_rootings_counted())
        if is_rooted:
            self.assertTrue(sd.is_all_counted_trees_rooted())
        else:
            self.assertFalse(sd.is_all_counted_trees_rooted())
            self.assertTrue(sd.is_all_counted_trees_treated_as_unrooted() or sd.is_all_counted_trees_strictly_unrooted())

        # splits_distribution also counts trivial splits, so this will not work
        # self.assertEqual(len(splits_ref), len(sd))

        expected_nontrivial_splits = list(splits_ref.keys())
        observed_splits = set(sd.split_counts.keys())
        visited_splits = []
        # for k in sorted(observed_splits):
        #     print("{}: {}, {}".format(k, sd.split_counts[k], sd[k]))
        all_taxa_bitmask = sd.taxon_namespace.all_taxa_bitmask()
        for split in expected_nontrivial_splits:
            self.assertAlmostEqual(sd.split_counts[split], splits_ref[split]["count"], 2,
                    "{} (using '{}'): {}".format(tree_filename, splits_filename, split))
            self.assertAlmostEqual(sd[split], splits_ref[split]["frequency"], 2,
                    "{} (using '{}'): {}".format(tree_filename, splits_filename, split))
            self.assertAlmostEqual(sd.split_frequencies[split], splits_ref[split]["frequency"], 2,
                    "{} (using '{}'): {}".format(tree_filename, splits_filename, split))
            if split in observed_splits:
                observed_splits.remove(split)
            visited_splits.append(split)
        self.assertEqual(len(visited_splits), len(expected_nontrivial_splits))

        # ensure remaining splits (not given in PAUP splits file) are trivial ones (which are not tracked by PAUP)
        for split in observed_splits:
            self.assertTrue(dendropy.Bipartition.is_trivial_bitmask(split, all_taxa_bitmask))
示例#42
0
 def test_compatibility(self):
     regimes = (
         ("dendropy-test-trees-n12-x2.nexus", "all"),
         ("dendropy-test-trees-n33-unrooted-x100a.nexus", "from-trees"),
         ("dendropy-test-trees-n10-rooted-treeshapes.nexus", "all"),
     )
     for trees_filename_idx, (
             trees_filename,
             bipartition_generation_mode) in enumerate(regimes):
         trees_filepath = pathmap.tree_source_path(trees_filename)
         trees = dendropy.TreeList.get_from_path(
             trees_filepath,
             "nexus",
         )
         bipartitions = generate_bipartitions(trees,
                                              bipartition_generation_mode,
                                              is_rooted=trees[0].is_rooted)
         # for bipartition1_idx, bipartition1 in enumerate(bipartitions):
         for bipartition1_idx, bipartition1 in enumerate(bipartitions):
             for tree_idx, tree in enumerate(trees):
                 compatible_bipartitions = set()
                 incompatible_bipartitions = set()
                 bipartition_encoding = tree.encode_bipartitions()
                 for biparition2_idx, bipartition2 in enumerate(
                         bipartition_encoding):
                     if bipartition2.is_compatible_with(bipartition1):
                         self.assertTrue(
                             bipartition1.is_compatible_with(bipartition2))
                         compatible_bipartitions.add(bipartition2)
                     else:
                         self.assertFalse(
                             bipartition1.is_compatible_with(bipartition2))
                         incompatible_bipartitions.add(bipartition2)
                 is_compatible = tree.is_compatible_with_bipartition(
                     bipartition1)
                 self.assertEqual(
                     len(compatible_bipartitions) +
                     len(incompatible_bipartitions),
                     len(bipartition_encoding))
                 if is_compatible:
                     self.assertEqual(
                         len(incompatible_bipartitions), 0,
                         "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found compatible with tree, but is incompatible with following bipartitions on tree: {}"
                         .format(
                             tree_idx,
                             trees_filename,
                             bipartition1.split_as_bitstring(),
                             bipartition1.leafset_as_bitstring(),
                             bipartition1_idx,
                             [
                                 b.split_as_bitstring()
                                 for b in incompatible_bipartitions
                             ],
                         ))
                     self.assertEqual(len(compatible_bipartitions),
                                      len(bipartition_encoding))
                 else:
                     self.assertTrue(
                         len(incompatible_bipartitions) > 0,
                         "Tree {} of '{}': bipartition {} (leafset = {}, index = {}) found incompatible with tree, but is compatible with all bipartitions on tree: {}"
                         .format(
                             tree_idx,
                             trees_filename,
                             bipartition1.split_as_bitstring(),
                             bipartition1.leafset_as_bitstring(),
                             bipartition1_idx,
                             [
                                 b.split_as_bitstring()
                                 for b in compatible_bipartitions
                             ],
                         ))
示例#43
0
 def get_regime(self,
                is_rooted,
                is_multifurcating,
                is_weighted,
                tree_offset=0,
                taxon_namespace=None,
                num_trees=500):
     if taxon_namespace is None:
         taxon_namespace = dendropy.TaxonNamespace()
     if is_multifurcating:
         if is_rooted:
             tree_filename = "dendropy-test-trees-multifurcating-rooted.nexus"
         else:
             tree_filename = "dendropy-test-trees-multifurcating-unrooted.nexus"
     else:
         if is_rooted:
             tree_filename = "dendropy-test-trees-n10-rooted-treeshapes.nexus"
         else:
             tree_filename = "dendropy-test-trees-n14-unrooted-treeshapes.nexus"
     source_trees = dendropy.TreeList.get_from_path(
         pathmap.tree_source_path(tree_filename),
         "nexus",
         taxon_namespace=taxon_namespace)
     for tree in source_trees:
         tree.encode_bipartitions()
         tree.key = frozenset(tree.bipartition_encoding)
         tree.total_weighted_count = 0.0
         tree.actual_count = 0
     # if is_weighted:
     #     weights = []
     #     for tree in source_trees:
     #         w = random.uniform(0.1, 10)
     #         tree.weight = w
     #         weights.append(w)
     # else:
     #     weights = [1.0 for i in len(source_trees)]
     test_tree_strings = []
     total_weight = 0.0
     while len(test_tree_strings) < num_trees:
         tree = random.choice(source_trees)
         if len(test_tree_strings) >= tree_offset:
             tree.actual_count += 1
         if is_weighted:
             weight = random.choice([
                 0.25,
                 1.0,
                 2.8,
                 5.6,
                 11.0,
             ])
             tree.weight = weight
             if len(test_tree_strings) >= tree_offset:
                 tree.total_weighted_count += weight
                 total_weight += weight
         else:
             tree.weight = None
             if len(test_tree_strings) >= tree_offset:
                 tree.total_weighted_count += 1.0
                 total_weight += 1.0
         for nd in tree:
             nd.edge.length = random.uniform(0, 100)
         test_tree_strings.append(
             tree.as_string(
                 schema="newick",
                 store_tree_weights=is_weighted,
                 suppress_edge_lengths=False,
                 suppress_internal_node_labels=True,
                 suppress_internal_taxon_labels=True,
             ))
     test_trees_string = "\n".join(test_tree_strings)
     bipartition_encoding_freqs = {}
     source_trees.total_weight = total_weight
     for tree in source_trees:
         tree.frequency = float(tree.total_weighted_count) / total_weight
         bipartition_encoding_freqs[tree.key] = tree.frequency
     return source_trees, bipartition_encoding_freqs, test_trees_string
示例#44
0
 def test_group1(self):
     cetacean_taxon_labels = [
         "Bos taurus",
         "Balaena mysticetus",
         "Balaenoptera physalus",
         "Cephalorhynchus eutropia",
         "Delphinapterus leucas",
         "Delphinus delphis",
         "Eschrichtius robustus",
         "Globicephala melas",
         "Inia geoffrensis",
         "Kogia breviceps",
         "Kogia simus",
         "Lagenorhynchus albirostris",
         "Lagenorhynchus obscurus",
         "Lissodelphis peronii",
         "Megaptera novaeangliae",
         "Mesoplodon europaeus",
         "Mesoplodon peruvianus",
         "Phocoena phocoena",
         "Phocoena spinipinnis",
         "Physeter catodon",
         "Tursiops truncatus",
         "Ziphius cavirostris",
     ]
     issue_mth_taxon_labels = [
         "T{:02d}".format(i) for i in range(1, 60)
     ]
     sources = [
         ("cetaceans.mb.no-clock.mcmc.trees", 251, False,
          False),  # Trees explicitly unrooted
         ("cetaceans.mb.no-clock.mcmc.weighted-01.trees", 251, False,
          True),  # Weighted
         ("cetaceans.mb.no-clock.mcmc.weighted-02.trees", 251, False,
          True),  # Weighted
         ("cetaceans.mb.no-clock.mcmc.weighted-03.trees", 251, False,
          True),  # Weighted
         ("cetaceans.mb.strict-clock.mcmc.trees", 251, True,
          False),  # Trees explicitly rooted
         ("cetaceans.mb.strict-clock.mcmc.weighted-01.trees", 251, True,
          True),  # Weighted
         ("cetaceans.mb.strict-clock.mcmc.weighted-02.trees", 251, True,
          True),  # Weighted
         ("cetaceans.mb.strict-clock.mcmc.weighted-03.trees", 251, True,
          True),  # Weighted
         (
             "cetaceans.raxml.bootstraps.trees", 250, True, False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         (
             "cetaceans.raxml.bootstraps.weighted-01.trees", 250, True,
             False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         (
             "cetaceans.raxml.bootstraps.weighted-02.trees", 250, True,
             False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         (
             "cetaceans.raxml.bootstraps.weighted-03.trees", 250, True,
             False
         ),  # No tree rooting statement; PAUP defaults to rooted, DendroPy defaults to unrooted
         ("issue_mth_2009-02-03.rooted.nexus", 100, True,
          False),  # 100 trees (frequency column not reported by PAUP)
         ("issue_mth_2009-02-03.unrooted.nexus", 100, False,
          False),  # 100 trees (frequency column not reported by PAUP)
     ]
     splits_filename_template = "{stemname}.is-rooted-{is_rooted}.use-tree-weights-{use_weights}.burnin-{burnin}.splits.txt"
     for tree_filename, num_trees, treefile_is_rooted, treefile_is_weighted in sources:
         stemname = tree_filename
         if "cetacean" in tree_filename:
             expected_taxon_labels = cetacean_taxon_labels
             taxa_definition_filepath = pathmap.tree_source_path(
                 "cetaceans.taxa.nex")
         else:
             expected_taxon_labels = issue_mth_taxon_labels
             taxa_definition_filepath = pathmap.tree_source_path(
                 "issue_mth_2009-02-03.unrooted.nexus")
         for use_weights in (False, True, None):
             for paup_read_as_rooted in (None, True, False):
                 for paup_burnin in (0, 150):
                     if tree_filename.startswith(
                             "issue_mth") and paup_burnin > 0:
                         continue
                     if paup_read_as_rooted is None:
                         expected_is_rooted = treefile_is_rooted
                     elif paup_read_as_rooted:
                         expected_is_rooted = True
                     else:
                         expected_is_rooted = False
                     splits_filename = splits_filename_template.format(
                         stemname=stemname,
                         is_rooted=paup_read_as_rooted,
                         use_weights=use_weights,
                         burnin=paup_burnin)
                     self.check_splits_counting(
                         tree_filename=tree_filename,
                         taxa_definition_filepath=
                         taxa_definition_filepath,
                         splits_filename=splits_filename,
                         paup_as_rooted=paup_read_as_rooted,
                         paup_use_tree_weights=use_weights,
                         paup_burnin=paup_burnin,
                         expected_taxon_labels=expected_taxon_labels,
                         expected_is_rooted=expected_is_rooted,
                         expected_num_trees=num_trees - paup_burnin)
示例#45
0
        def check_splits_counting(
            self,
            tree_filename,
            taxa_definition_filepath,
            splits_filename,
            paup_as_rooted,
            paup_use_tree_weights,
            paup_burnin,
            expected_taxon_labels,
            expected_is_rooted,
            expected_num_trees,
        ):
            tree_filepath = pathmap.tree_source_path(tree_filename)
            paup_service = paup.PaupService()
            result = paup_service.count_splits_from_files(
                tree_filepaths=[tree_filepath],
                taxa_definition_filepath=taxa_definition_filepath,
                is_rooted=paup_as_rooted,
                use_tree_weights=paup_use_tree_weights,
                burnin=paup_burnin,
            )
            num_trees = result["num_trees"]
            bipartition_counts = result["bipartition_counts"]
            bipartition_freqs = result["bipartition_freqs"]
            taxon_namespace = result["taxon_namespace"]
            is_rooted = result["is_rooted"]

            # check taxon namespace
            self.assertEqual(len(taxon_namespace), len(expected_taxon_labels))
            for taxon, expected_label in zip(taxon_namespace,
                                             expected_taxon_labels):
                self.assertEqual(taxon.label, expected_label)

            # check general tree state
            self.assertEqual(num_trees, expected_num_trees)
            self.assertIs(is_rooted, expected_is_rooted)

            splits_ref = paupsplitsreference.get_splits_reference(
                splits_filename=splits_filename,
                key_column_index=0,
            )
            self.assertEqual(len(splits_ref), len(bipartition_counts))
            self.assertEqual(len(splits_ref), len(bipartition_freqs))
            if is_rooted:
                splits_ref_bitmasks = set([
                    splits_ref[x]["unnormalized_split_bitmask"]
                    for x in splits_ref
                ])
            else:
                splits_ref_bitmasks = set([
                    splits_ref[x]["normalized_split_bitmask"]
                    for x in splits_ref
                ])
            counts_keys = set(bipartition_counts.keys())
            freqs_keys = set(bipartition_freqs.keys())
            self.assertEqual(len(counts_keys), len(splits_ref_bitmasks))
            self.assertEqual(
                counts_keys, splits_ref_bitmasks,
                "\n    {}\n\n    {}\n\n".format(sorted(counts_keys),
                                                sorted(splits_ref_bitmasks)))
            for split_str_rep in splits_ref:
                ref = splits_ref[split_str_rep]
                self.assertEqual(split_str_rep, ref["bipartition_string"])
                self.assertEqual(
                    paup.PaupService.bipartition_groups_to_split_bitmask(
                        split_str_rep, normalized=False),
                    ref["unnormalized_split_bitmask"])
                self.assertEqual(
                    paup.PaupService.bipartition_groups_to_split_bitmask(
                        split_str_rep, normalized=True),
                    ref["normalized_split_bitmask"])
                split_bitmask = paup.PaupService.bipartition_groups_to_split_bitmask(
                    split_str_rep, normalized=not is_rooted)
                self.assertEqual(bipartition_counts[split_bitmask],
                                 ref["count"])
                # self.assertAlmostEqual(bipartition_freqs[split_bitmask], ref["frequency"])
                self.assertAlmostEqual(bipartition_freqs[split_bitmask],
                                       ref["frequency"],
                                       2)  # PAUP* 4.10b: no very precise
示例#46
0
 def get_trees(self):
     trees = dendropy.TreeList.get_from_path(
         pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"),
         "nexus")
     return trees
示例#47
0
 def setUpClass(cls):
     ref_path = pathmap.tree_source_path(
         "bipartition_encoding_fixture.json")
     with open(ref_path, "r") as src:
         cls.reference = json.load(src)
 def get_trees(self):
     trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path("issue_mth_2009-02-03.rooted.nexus"),
             "nexus")
     return trees
 def setUp(self):
     self.support_trees_path = pathmap.tree_source_path("primates.beast.mcmc.trees")
     self.target_tree_path = pathmap.tree_source_path("primates.beast.mcct.noedgelens.tree")
     self.expected_tree_path = pathmap.tree_source_path("primates.beast.mcct.medianh.tre")
     self.burnin = 40
 def setUpClass(cls):
     ref_path = pathmap.tree_source_path("bipartition_encoding_fixture.json")
     with open(ref_path, "r") as src:
         cls.reference = json.load(src)
 def get_regime(self,
         is_rooted,
         is_multifurcating,
         is_weighted,
         tree_offset=0,
         taxon_namespace=None,
         num_trees=500):
     if taxon_namespace is None:
         taxon_namespace = dendropy.TaxonNamespace()
     if is_multifurcating:
         if is_rooted:
             tree_filename = "dendropy-test-trees-multifurcating-rooted.nexus"
         else:
             tree_filename = "dendropy-test-trees-multifurcating-unrooted.nexus"
     else:
         if is_rooted:
             tree_filename = "dendropy-test-trees-n10-rooted-treeshapes.nexus"
         else:
             tree_filename = "dendropy-test-trees-n14-unrooted-treeshapes.nexus"
     source_trees = dendropy.TreeList.get_from_path(
             pathmap.tree_source_path(tree_filename),
             "nexus",
             taxon_namespace=taxon_namespace)
     for tree in source_trees:
         tree.encode_bipartitions()
         tree.key = frozenset(tree.bipartition_encoding)
         tree.total_weighted_count = 0.0
         tree.actual_count = 0
     # if is_weighted:
     #     weights = []
     #     for tree in source_trees:
     #         w = random.uniform(0.1, 10)
     #         tree.weight = w
     #         weights.append(w)
     # else:
     #     weights = [1.0 for i in len(source_trees)]
     test_tree_strings = []
     total_weight = 0.0
     while len(test_tree_strings) < num_trees:
         tree = random.choice(source_trees)
         if len(test_tree_strings) >= tree_offset:
             tree.actual_count += 1
         if is_weighted:
             weight = random.choice([0.25, 1.0, 2.8, 5.6, 11.0,])
             tree.weight = weight
             if len(test_tree_strings) >= tree_offset:
                 tree.total_weighted_count += weight
                 total_weight += weight
         else:
             tree.weight = None
             if len(test_tree_strings) >= tree_offset:
                 tree.total_weighted_count += 1.0
                 total_weight += 1.0
         for nd in tree:
             nd.edge.length = random.uniform(0, 100)
         test_tree_strings.append(tree.as_string(
             schema="newick",
             store_tree_weights=is_weighted,
             suppress_edge_lengths=False,
             suppress_internal_node_labels=True,
             suppress_internal_taxon_labels=True,
             ))
     test_trees_string = "\n".join(test_tree_strings)
     bipartition_encoding_freqs = {}
     source_trees.total_weight = total_weight
     for tree in source_trees:
         tree.frequency = float(tree.total_weighted_count) / total_weight
         bipartition_encoding_freqs[tree.key] = tree.frequency
     return source_trees, bipartition_encoding_freqs, test_trees_string
 def get_trees(self, taxon_namespace=None):
     trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path(
             "pythonidae.reference-trees.nexus"),
             "nexus",
             taxon_namespace=taxon_namespace)
     return trees
 def test_multiple_trees(self):
     src_filename = "multitreeblocks.nex"
     src_path = pathmap.tree_source_path(src_filename)
     ds = dendropy.DataSet.get_from_path(src_path, "nexus")
     self.assertEqual(len(ds.taxon_namespaces), 1)
     self.assertEqual(len(ds.tree_lists), 3)