def tree_from_splits(self, split_distribution, min_freq=0.5, include_edge_lengths=True): """Returns a consensus tree from splits in `split_distribution`. If include_edge_length_var is True, then the sample variance of the edge length will also be calculated and will be stored as a length_var attribute. """ taxon_set = split_distribution.taxon_set taxa_mask = taxon_set.all_taxa_bitmask() if self.weighted_splits: split_freqs = split_distribution.weighted_split_frequencies else: split_freqs = split_distribution.split_frequencies is_rooted = split_distribution.is_rooted #include_edge_lengths = self.support_as_labels and include_edge_lengths if self.support_as_edge_lengths and include_edge_lengths: raise Exception("Cannot map support as edge lengths if edge lengths are to be set on consensus tree") to_try_to_add = [] _almost_one = lambda x: abs(x - 1.0) <= 0.0000001 for s, freq in split_freqs.iteritems(): if (min_freq is None) or (freq > min_freq) or (_almost_one(min_freq) and _almost_one(freq)): to_try_to_add.append((freq, s)) to_try_to_add.sort(reverse=True) splits_for_tree = [i[1] for i in to_try_to_add] con_tree = treesplit.tree_from_splits(splits=splits_for_tree, taxon_set=taxon_set, is_rooted=is_rooted) treesplit.encode_splits(con_tree) if include_edge_lengths: split_edge_lengths = {} for split, edges in split_distribution.split_edge_lengths.items(): if len(edges) > 0: mean, var = mean_and_sample_variance(edges) elen = mean else: elen = None split_edge_lengths[split] = elen else: split_edge_lengths = None for node in con_tree.postorder_node_iter(): split = node.edge.split_bitmask if split in split_freqs: self.map_split_support_to_node(node=node, split_support=split_freqs[split]) if include_edge_lengths and split in split_distribution.split_edge_lengths: edges = split_distribution.split_edge_lengths[split] if len(edges) > 0: mean, var = mean_and_sample_variance(edges) elen = mean else: elen = None node.edge.length = elen return con_tree
def testReferenceTree(self): ref_tree_list = datagen.reference_tree_list() t_tree_list = dendropy.TreeList() for ref_tree in ref_tree_list: treesplit.encode_splits(ref_tree) splits = ref_tree.split_edges.keys() t_tree = treesplit.tree_from_splits(splits=splits, taxon_set=ref_tree_list.taxon_set, is_rooted=ref_tree.is_rooted) self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)
def calc_tree_freqs(self, taxon_set, is_rooted=False): """ Returns an ordered dictionary (OrderedDict) of DendroPy trees mapped to a tuple, (raw numbers of occurrences, proportion of total trees counted) in (descending) order of the proportion of occurrence. """ hash_freqs = self.calc_hash_freqs() tree_freqs = OrderedDict() for topology_hash, (count, freq) in hash_freqs.items(): tree = treesplit.tree_from_splits(splits=topology_hash, taxon_set=taxon_set, is_rooted=is_rooted) tree_freqs[tree] = (count, freq) return tree_freqs
def testUltrametricTrees(self): tree_files = [ "pythonidae.beast.summary.tre", "primates.beast.mcct.medianh.tre" ] for tree_file in tree_files: ref_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path(tree_file), "nexus", as_rooted=True) treesplit.encode_splits(ref_tree) splits = ref_tree.split_edges.keys() t_tree = treesplit.tree_from_splits(splits=splits, taxon_set=ref_tree.taxon_set, is_rooted=ref_tree.is_rooted) treesplit.encode_splits(t_tree) self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)
def testUltrametricTrees(self): tree_files = [ "pythonidae.beast.summary.tre", "primates.beast.mcct.medianh.tre" ] for tree_file in tree_files: ref_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path(tree_file), "nexus", as_rooted=True) treesplit.encode_splits(ref_tree) splits = ref_tree.split_edges.keys() t_tree = treesplit.tree_from_splits(splits=splits, taxon_set=ref_tree.taxon_set, is_rooted=ref_tree.is_rooted) treesplit.encode_splits(t_tree) self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)