def get_backbone_tree(tree1, tree2): """Constain tree1 to its shared leaf set with tree2 Parameters ---------- tree1 : dendropy tree object tree2 : dendropy tree object Returns ------- tree1 : dendropy tree object """ tree1 = deepcopy(tree1) leaves1 = njmergepair.get_leaf_set(tree1) leaves2 = njmergepair.get_leaf_set(tree2) shared = list(leaves1.intersection(leaves2)) taxa = dendropy.TaxonNamespace(shared) tree1.retain_taxa_with_labels(shared) tree1.migrate_taxon_namespace(taxa) tree1.encode_bipartitions() return tree1
def map_splits_to_node_list(big_tree, lil_tree): """Map a split in little tree to a list of nodes in big tree NOTE: Because little tree is *contained* within big tree, more than one node in big tree can be mapped to the same split. Parameters ---------- big_tree : dendropy tree object lil_tree : dendropy tree object Returns ------- split_to_node_list : python dictionary keys - splits encoded as integers (read below!) values - lists of nodes in a dendropy tree object """ lil_leafset = njmergepair.get_leaf_set(lil_tree) split_to_node_list = {} for node in big_tree.postorder_node_iter(): big_leafset = njmergepair.get_leaf_set(node) shared_leafset = list(big_leafset.intersection(lil_leafset)) if len(shared_leafset) > 0: split = lil_tree.taxon_namespace.taxa_bitmask( labels=shared_leafset) try: split_to_node_list[split].append(node) except KeyError: split_to_node_list[split] = [] split_to_node_list[split].append(node) return split_to_node_list
def dscmcombine(workdir, trees, mstmat, outfile): """ Parameters ---------- workdir : str working or output directory Results ------- Nothing """ # Turn matrix into graph graph = networkx.Graph(mstmat) # Merge trees combined_tree = None nodes = list(deepcopy(graph.nodes())) root = None next_roots = [nodes[0]] while len(nodes) > 0: if root is None: root = next_roots[0] neighbors = graph.neighbors(root) next_roots = list(set(next_roots).union(set(neighbors))) if len(neighbors) == 0: nodes.remove(root) next_roots.remove(root) root = None else: sys.stdout.write("Combining %d and %d...\n" % (root, neighbors[0])) if root < neighbors[0]: i = root j = neighbors[0] else: i = neighbors[0] j = root tijfile = name_treepair_file(workdir, trees[i], trees[j]) if not os.path.exists(tijfile): tijfile = name_treepair_file(workdir, trees[j], trees[i]) if combined_tree is None: combined_tree = dendropy.Tree.get(path=tijfile, schema="newick") else: tij = dendropy.Tree.get(path=tijfile, schema="newick") combine_two_trees_via_dscm(tij, combined_tree) combined_tree.update_bipartitions() sys.stdout.write("...combined tree has %d leaves!\n" % len(njmergepair.get_leaf_set(combined_tree))) graph.remove_edge(root, neighbors[0]) with open(outfile, 'w') as f: f.write(combined_tree.as_string(schema="newick")[5:])
def combine_two_trees_via_dscm(tree_AB, tree_BC): """Combines two trees via distance-based strict consensus merger A is the subset of leaves only in tree AB C is the subset of leaves only in tree BC B is the subset of leaves in both tree AB and tree BC Tree AB and tree BC must be equivalent on their shared leaf set B Parameters ---------- tree_AB : dendropy tree object tree_BC : dendropy tree object Returns ------- tree_BC : dendropy tree object tree BC with leaves from set A added """ data = list( njmergepair.get_leaf_set(tree_AB).intersection( njmergepair.get_leaf_set(tree_BC))) taxa = dendropy.TaxonNamespace(data) # [incompatible] = are_two_trees_incompatible(tree_AB, tree_BC) # if incompatible: # sys.exit("Input trees are not compatible!\n") backbone_tree = get_backbone_tree(tree_AB, tree_BC) if backbone_tree is None: raise Exception("Unable to extract a backbone tree!\n") # Root all trees at the same shared leaf -- # required for split mapping and post-order traversal to work! root = backbone_tree.taxon_namespace[0].label node_XX = backbone_tree.find_node_with_taxon_label(root) node_AB = tree_AB.find_node_with_taxon_label(root) node_BC = tree_BC.find_node_with_taxon_label(root) elen_AB = node_AB.edge.length / 2.0 elen_BC = node_BC.edge.length / 2.0 backbone_tree.is_rooted = True tree_AB.is_rooted = True tree_BC.is_rooted = True backbone_tree.reroot_at_edge(node_XX.edge) tree_AB.reroot_at_edge(node_AB.edge) tree_BC.reroot_at_edge(node_BC.edge) node_AB.edge.length = elen_AB node_BC.edge.length = elen_BC node_AB.sibling_nodes()[0].edge.length = elen_AB node_BC.sibling_nodes()[0].edge.length = elen_BC # Map nodes based on splits in shared leaf set map_AB = map_splits_to_node_list(tree_AB, backbone_tree) map_BC = map_splits_to_node_list(tree_BC, backbone_tree) # Add missing taxa from AB **into** BC using the # distance-based SCM strategy to handle collisions nodes = [n for n in backbone_tree.postorder_node_iter()] for node in nodes[:-1]: clade = njmergepair.get_leaf_set(node) split = backbone_tree.taxon_namespace.taxa_bitmask(labels=clade) node_path_AB = map_AB[split] node_path_BC = map_BC[split] num_edges_AB = len(node_path_AB) num_edges_BC = len(node_path_BC) sibs_path_AB = [] for n in node_path_AB: s = n.sibling_nodes() if len(s) > 1: sys.exit("Tree AB is not binary!\n") sibs_path_AB.append(s[0]) sibs_path_BC = [] for n in node_path_BC: s = n.sibling_nodes() if len(s) > 1: sys.exit("Tree BC is not binary!\n") sibs_path_BC.append(s[0]) if num_edges_AB > 1 and num_edges_BC > 1: # Found a collision -- add edges from tree AB to tree BC # Find normalization factor for path in AB # ALSO compute the point of attachment for edges in the path # IN ORDER to identify the ORDER in which edges should be added elen_AB = node_path_AB[0].edge.length elen_path_AB = [elen_AB] i_AB = 1 for node_AB in node_path_AB[1:]: elen_AB = elen_AB + node_AB.edge.length elen_path_AB.append(elen_path_AB[i_AB - 1] + node_AB.edge.length) i_AB = i_AB + 1 elen_BC = node_path_BC[0].edge.length elen_path_BC = [elen_BC] i_BC = 1 for node_BC in node_path_BC[1:]: elen_BC = elen_BC + node_BC.edge.length elen_path_BC.append(elen_path_BC[i_BC - 1] + node_BC.edge.length) i_BC = i_BC + 1 if elen_AB == 0.0 or elen_BC == 0.0: raise Exception("Collision on path of length zero!\n") norm_AB = elen_BC / elen_AB for i in range(len(elen_path_AB)): elen_path_AB[i] = elen_path_AB[i] * norm_AB # Extract components of tree BC node_BC = node_path_BC[-1] sibs_BC = sibs_path_BC[-1] parent_BC = node_BC.parent_node parent_BC.clear_child_nodes() # Get node in tree BC and update branch length child1 = node_path_BC[0] elen_AB = node_path_AB[0].edge.length * norm_AB elen_BC = node_path_BC[0].edge.length if elen_AB < elen_BC: child1.edge.length = elen_AB else: child1.edge.length = elen_BC i_AB = 0 i_BC = 0 start = None while (True): dothis = None if i_AB < num_edges_AB - 1 and i_BC < num_edges_BC - 1: if elen_path_AB[i_AB] == elen_path_BC[i_BC]: # Add a new node created from AB and BC dothis = 3 elif elen_path_AB[i_AB] < elen_path_BC[i_BC]: # Add remaining edges from tree AB dothis = 1 else: # Add remaining edges from tree BC dothis = 2 elif i_AB < num_edges_AB - 1: # Add remaining edges from tree AB dothis = 1 elif i_BC < num_edges_BC - 1: # Add remaining edges from tree BC dothis = 2 else: # No more edges to add! break if dothis == 1: # Adding AB only child2 = sibs_path_AB[i_AB] if start is None: start = child1.edge.length else: stop = elen_path_AB[i_AB] child1.edge.length = stop - start start = stop i_AB = i_AB + 1 elif dothis == 2: # Add BC only child2 = sibs_path_BC[i_BC] if start is None: start = child1.edge.length else: stop = elen_path_BC[i_BC] child1.edge.length = stop - start start = stop i_BC = i_BC + 1 else: # Add both AB and BC child2 = dendropy.Node() child2.set_child_nodes( [sibs_path_AB[i_AB], sibs_path_BC[i_BC]]) child2.edge.length = 0.0 if start is None: start = child1.edge.length else: stop = elen_path_BC[i_BC] child1.edge.length = stop - start start = stop i_AB = i_AB + 1 i_BC = i_BC + 1 child2.parent_node = None # Combine nodes from tree BC (node 1) and # tree AB or tree BC (node 2) next_node = dendropy.Node() next_node.set_child_nodes([child1, child2]) # Set node in tree BC as next_node child1 = next_node # Recombine the three components of tree BC next_node.edge.length = elen_path_BC[-1] - start parent_BC.set_child_nodes([next_node] + [sibs_BC]) elif num_edges_AB > 1: # Found edges in tree AB not in tree BC -- # add edges from tree AB to tree BC! # Find normalization factor for path in AB elen_BC = node_path_BC[0].edge.length elen_AB = 0.0 elen_path_AB = [] for node_AB in node_path_AB: elen_AB = elen_AB + node_AB.edge.length elen_path_AB.append(node_AB.edge.length) if elen_AB == 0.0: xxxx_AB = elen_BC / num_edges_AB for i in range(num_edges_AB): elen_path_AB[i] = xxxx_AB else: norm_AB = elen_BC / elen_AB for i in range(num_edges_AB): elen_path_AB[i] = elen_path_AB[i] * norm_AB # Extract components of tree BC node_BC = node_path_BC[0] sibs_BC = sibs_path_BC[0] parent_BC = node_BC.parent_node parent_BC.clear_child_nodes() # Get node in tree BC and update branch length to match tree AB child1 = node_path_BC[0] child1.edge.length = elen_path_AB[0] # Add each nodes in tree AB to tree BC for i_AB in range(1, num_edges_AB): # Remove parent from child 1 child1.parent_node = None # Get node being added from tree AB! child2 = sibs_path_AB[i_AB - 1] child2.parent_node = None # Combine nodes from tree BC (node 1) and tree AB (node 2) next_node = dendropy.Node() next_node.set_child_nodes([child1, child2]) next_node.edge.length = elen_path_AB[i_AB] # Set node in tree BC as next_node child1 = next_node # Recombine the three components of tree BC parent_BC.set_child_nodes([next_node] + [sibs_BC]) elif num_edges_BC > 1: # Found edges in tree BC not in tree AB pass else: # Found one edge in tree BC and one edge in tree AB pass tree_BC.migrate_taxon_namespace(taxa)