def _collapse_paths_not_found(f, s, other_dict=None): to_del = [] for masked_split, path in f.iteritems(): if masked_split not in s: for edge in path: if other_dict: del other_dict[edge.split_bitmask] collapse_edge(edge) to_del.append(masked_split) for k in to_del: del f[k]
def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False): """Returns matrix of the symmetric_differences between trees after all internal edges with lengths < `edge_len_threshold` have been collapsed. If `copy_trees` is True then the trees will be copied first (if False, then the trees may will have their short edges collapsed on exit). """ if copy_trees: tree_list = [copy.copy(i) for i in trees_to_compare] else: tree_list = list(trees_to_compare) n_trees = len(tree_list) _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list]))) if n_trees < 2: return [0 for t in tree_list] f_r = [] for tree in tree_list: to_collapse = [] encode_splits(tree) for edge in tree.preorder_edge_iter(filter_fn=Edge.is_internal): elen = edge.length if elen is not None and elen < edge_len_threshold: to_collapse.append(edge) for edge in to_collapse: collapse_edge(edge) f_r.append(tree.is_rooted) tree.is_rooted = bool(rooted) encode_splits(tree) sd_row = [0] * n_trees sd_mat = [list(sd_row) for i in xrange(n_trees)] for i, tree_one in enumerate(tree_list[:-1]): for col_count, tree_two in enumerate(tree_list[1 + i:]): j = i + 1 + col_count sd = symmetric_difference(tree_one, tree_two) sd_mat[i][j] = sd sd_mat[j][i] = sd if not copy_trees: for r, tree in itertools.izip(f_r, tree_list): tree.is_rooted = r return sd_mat
def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False): """Returns matrix of the symmetric_differences between trees after all internal edges with lengths < `edge_len_threshold` have been collapsed. If `copy_trees` is True then the trees will be copied first (if False, then the trees may will have their short edges collapsed on exit). """ if copy_trees: tree_list = [copy.copy(i) for i in trees_to_compare] else: tree_list = list(trees_to_compare) n_trees = len(tree_list) _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list]))) if n_trees < 2: return [0 for t in tree_list] f_r = [] for tree in tree_list: to_collapse = [] encode_splits(tree) for edge in tree.preorder_edge_iter(filter_fn=Edge.is_internal): elen = edge.length if elen is not None and elen < edge_len_threshold: to_collapse.append(edge) for edge in to_collapse: collapse_edge(edge) f_r.append(tree.is_rooted) tree.is_rooted = bool(rooted) encode_splits(tree) sd_row = [0]*n_trees sd_mat = [list(sd_row) for i in xrange(n_trees)] for i, tree_one in enumerate(tree_list[:-1]): for col_count, tree_two in enumerate(tree_list[1+i:]): j = i + 1 + col_count sd = symmetric_difference(tree_one, tree_two) sd_mat[i][j] = sd sd_mat[j][i] = sd if not copy_trees: for r, tree in itertools.izip(f_r, tree_list): tree.is_rooted = r return sd_mat
def deroot(self): "Converts a degree-2 node at the root to a degree-3 node." seed_node = self.seed_node if not seed_node: return child_nodes = seed_node.child_nodes() if len(child_nodes) != 2: return if len(child_nodes[1].child_nodes()) >= 2: to_keep, to_del = child_nodes elif len(child_nodes[0].child_nodes()) >= 2: to_del, to_keep = child_nodes else: return to_del_edge = to_del.edge try: to_keep.edge.length += to_del_edge.length except: pass from dendropy.treemanip import collapse_edge collapse_edge(to_del_edge)
def add_to_scm(to_modify, to_consume, rooted=False, gordons_supertree=False): """Adds the tree `to_consume` to the tree `to_modify` in a strict consensus merge operation. Both trees must have had encode_splits called on them.""" assert (to_modify.taxon_set is to_consume.taxon_set) taxon_set = to_consume.taxon_set if rooted: raise NotImplementedError("rooted form of add_to_scm not implemented") to_mod_root = to_modify.seed_node to_mod_split = to_mod_root.edge.split_bitmask to_consume_root = to_consume.seed_node to_consume_split = to_consume_root.edge.split_bitmask leaf_intersection = to_mod_split & to_consume_split if _IS_DEBUG_LOGGING: _LOG.debug("add_to_scm:\n %s\n + %s\n%s" % (str(to_modify), str(to_consume), format_split(leaf_intersection, taxon_set=taxon_set))) n_common_leaves = count_bits(leaf_intersection) if n_common_leaves < 2: _LOG.error('trees must have at least 2 common leaves') raise ValueError('trees must have at least 2 common leaves') if n_common_leaves == 2: # SCM with 2 leaves in common results in a polytomy collapse_clade(to_mod_root) collapse_clade(to_consume_root) leaves_to_steal = [ c for c in to_consume_root.child_nodes() if not (leaf_intersection & c.edge.split_bitmask) ] for leaf in leaves_to_steal: to_mod_root.add_child(leaf) to_mod_root.edge.split_bitmask |= leaf.edge.split_bitmask to_modify.split_edges = { to_mod_root.edge.split_bitmask: to_mod_root.edge } for child in to_mod_root.child_nodes(): to_modify.split_edges[child.edge.split_bitmask] = child.edge return # at least 3 leaves in common tmse = to_modify.split_edges to_mod_relevant_splits = {} to_consume_relevant_splits = {} if not rooted: if _IS_DEBUG_LOGGING: to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG) to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG) reroot_on_lowest_common_index_path(to_modify, leaf_intersection) reroot_on_lowest_common_index_path(to_consume, leaf_intersection) if _IS_DEBUG_LOGGING: to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG) to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG) to_mod_root = to_modify.seed_node assert (to_mod_root.edge.split_bitmask == to_mod_split) to_consume_root = to_consume.seed_node assert (to_consume_root.edge.split_bitmask == to_consume_split) for s, e in tmse.iteritems(): s = e.split_bitmask masked = s & leaf_intersection if masked and masked != leaf_intersection: e_list = to_mod_relevant_splits.setdefault(masked, []) e_list.append((s, e)) for s, e in to_consume.split_edges.iteritems(): s = e.split_bitmask masked = s & leaf_intersection if masked and masked != leaf_intersection: e_list = to_consume_relevant_splits.setdefault(masked, []) e_list.append((s, e)) # Because each of these paths radiates away from the root (none of the paths # cross the root), the split_bitmasks for deeper edges will be supersets # of the split_bitmasks for shallower nodes. Thus if we reverse sort we # get the edges in the order root->tip for split, path in to_mod_relevant_splits.iteritems(): path.sort(reverse=True) t = [i[1] for i in path] del path[:] path.extend(t) for split, path in to_consume_relevant_splits.iteritems(): path.sort(reverse=True) t = [i[1] for i in path] del path[:] path.extend(t) if _IS_DEBUG_LOGGING: to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG) to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG) # first we'll collapse all paths in the common leafset in to_modify that # are not in to_consume _collapse_paths_not_found(to_mod_relevant_splits, to_consume_relevant_splits, tmse) # Now we'll collapse all paths in the common leafset in to_consume that # are not in to_modify _collapse_paths_not_found(to_consume_relevant_splits, to_mod_relevant_splits) # first we'll deal with subtrees that are: # - not in the leaf intersection set, and # - attached to "relevant" nodes # We simply move these subtrees from the to_consume tree to the appropriate # node in to_modify to_steal = [ i for i in to_consume_root.child_nodes() if (i.edge.split_bitmask & leaf_intersection) == 0 ] for child in to_steal: to_mod_root.add_child(child) to_mod_root.edge.split_bitmask |= child.edge.split_bitmask for masked_split, to_consume_path in to_consume_relevant_splits.iteritems( ): to_mod_path = to_mod_relevant_splits.get(masked_split) if _IS_DEBUG_LOGGING and to_mod_path is None: #to_mod_path is None: _LOG.debug("%s = mask" % format_split(leaf_intersection, taxon_set=taxon_set)) _LOG.debug("%s = masked" % format_split(masked_split, taxon_set=taxon_set)) _LOG.debug("%s = raw" % format_split( to_consume_path[-1].split_bitmask, taxon_set=taxon_set)) for k, v in to_mod_relevant_splits.iteritems(): _LOG.debug("%s in to_mod_relevant_splits" % format_split(k, taxon_set=taxon_set)) assert to_mod_path is not None to_mod_head = to_mod_path[-1].head_node to_mod_head_edge = to_mod_head.edge to_consume_head = to_consume_path[-1].head_node for child in to_consume_head.child_nodes(): if (child.edge.split_bitmask & leaf_intersection) == 0: # child is the root of a subtree that has no children in the leaf_intersection to_mod_head.add_child(child) to_mod_head_edge.split_bitmask |= child.edge.split_bitmask if len(to_consume_path) > 1: if len(to_mod_path) > 1: # collision if gordons_supertree: for edge in to_mod_path[2:]: p = edge.tail_node c = edge.head_node sibs = p.child_nodes() for sib in sibs: _LOG.debug("sib is %s" % (sib.compose_newick())) if sib is not c: if not sib.is_leaf(): collapse_clade(sib) collapse_edge(sib.edge) collapse_edge(p.edge) mid_node = to_mod_path[0].head_node for edge in to_consume_path[1:]: p = edge.tail_node avoid = edge.head_node for child in p.child_nodes(): _LOG.debug("child is %s" % (child.compose_newick())) if child is not avoid: mid_node.add_child(child) collapse_clade(child) if not child.is_leaf(): collapse_edge(child.edge) mid_node.edge.split_bitmask |= child.edge.split_bitmask else: for edge in to_mod_path[1:-1]: collapse_edge(edge) mid_node = to_mod_path[0].head_node for edge in to_consume_path[1:]: p = edge.tail_node avoid = edge.head_node for child in p.child_nodes(): if child is not avoid: mid_node.add_child(child) mid_node.edge.split_bitmask |= child.edge.split_bitmask else: # we have to move the subtrees from to_consume to to_modify to_mod_edge = to_mod_path[0] to_mod_tail, to_mod_head = to_mod_edge.tail_node, to_mod_edge.head_node deepest_edge_to_move = to_consume_path[0] deepest_node_to_move = deepest_edge_to_move.head_node tipmost_edge_to_move = to_consume_path[-1] tipmost_node_to_move = tipmost_edge_to_move.tail_node prev_head = tipmost_edge_to_move.head_node to_mod_tail.add_child(deepest_node_to_move) to_mod_tail.remove_child(to_mod_head) tipmost_node_to_move.add_child(to_mod_head) tipmost_node_to_move.remove_child(prev_head) encode_splits(to_modify)
def add_to_scm(to_modify, to_consume, rooted=False, gordons_supertree=False): """Adds the tree `to_consume` to the tree `to_modify` in a strict consensus merge operation. Both trees must have had encode_splits called on them.""" assert(to_modify.taxon_set is to_consume.taxon_set) taxon_set = to_consume.taxon_set if rooted: raise NotImplementedError("rooted form of add_to_scm not implemented") to_mod_root = to_modify.seed_node to_mod_split = to_mod_root.edge.split_bitmask to_consume_root = to_consume.seed_node to_consume_split = to_consume_root.edge.split_bitmask leaf_intersection = to_mod_split & to_consume_split if _IS_DEBUG_LOGGING: _LOG.debug("add_to_scm:\n %s\n + %s\n%s" % (str(to_modify), str(to_consume), format_split(leaf_intersection, taxon_set=taxon_set))) n_common_leaves = count_bits(leaf_intersection) if n_common_leaves < 2: _LOG.error('trees must have at least 2 common leaves') raise ValueError('trees must have at least 2 common leaves') if n_common_leaves == 2: # SCM with 2 leaves in common results in a polytomy collapse_clade(to_mod_root) collapse_clade(to_consume_root) leaves_to_steal = [c for c in to_consume_root.child_nodes() if not (leaf_intersection & c.edge.split_bitmask)] for leaf in leaves_to_steal: to_mod_root.add_child(leaf) to_mod_root.edge.split_bitmask |= leaf.edge.split_bitmask to_modify.split_edges = {to_mod_root.edge.split_bitmask : to_mod_root.edge} for child in to_mod_root.child_nodes(): to_modify.split_edges[child.edge.split_bitmask] = child.edge return # at least 3 leaves in common tmse = to_modify.split_edges to_mod_relevant_splits = {} to_consume_relevant_splits = {} if not rooted: if _IS_DEBUG_LOGGING: to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG) to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG) reroot_on_lowest_common_index_path(to_modify, leaf_intersection) reroot_on_lowest_common_index_path(to_consume, leaf_intersection) if _IS_DEBUG_LOGGING: to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG) to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG) to_mod_root = to_modify.seed_node assert(to_mod_root.edge.split_bitmask == to_mod_split) to_consume_root = to_consume.seed_node assert(to_consume_root.edge.split_bitmask == to_consume_split) for s, e in tmse.iteritems(): s = e.split_bitmask masked = s & leaf_intersection if masked and masked != leaf_intersection: e_list = to_mod_relevant_splits.setdefault(masked, []) e_list.append((s, e)) for s, e in to_consume.split_edges.iteritems(): s = e.split_bitmask masked = s & leaf_intersection if masked and masked != leaf_intersection: e_list = to_consume_relevant_splits.setdefault(masked, []) e_list.append((s, e)) # Because each of these paths radiates away from the root (none of the paths # cross the root), the split_bitmasks for deeper edges will be supersets # of the split_bitmasks for shallower nodes. Thus if we reverse sort we # get the edges in the order root->tip for split, path in to_mod_relevant_splits.iteritems(): path.sort(reverse=True) t = [i[1] for i in path] del path[:] path.extend(t) for split, path in to_consume_relevant_splits.iteritems(): path.sort(reverse=True) t = [i[1] for i in path] del path[:] path.extend(t) if _IS_DEBUG_LOGGING: to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG) to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG) # first we'll collapse all paths in the common leafset in to_modify that # are not in to_consume _collapse_paths_not_found(to_mod_relevant_splits, to_consume_relevant_splits, tmse) # Now we'll collapse all paths in the common leafset in to_consume that # are not in to_modify _collapse_paths_not_found(to_consume_relevant_splits, to_mod_relevant_splits) # first we'll deal with subtrees that are: # - not in the leaf intersection set, and # - attached to "relevant" nodes # We simply move these subtrees from the to_consume tree to the appropriate # node in to_modify to_steal = [i for i in to_consume_root.child_nodes() if (i.edge.split_bitmask & leaf_intersection) == 0] for child in to_steal: to_mod_root.add_child(child) to_mod_root.edge.split_bitmask |= child.edge.split_bitmask for masked_split, to_consume_path in to_consume_relevant_splits.iteritems(): to_mod_path = to_mod_relevant_splits.get(masked_split) if _IS_DEBUG_LOGGING and to_mod_path is None: #to_mod_path is None: _LOG.debug("%s = mask" % format_split(leaf_intersection, taxon_set=taxon_set)) _LOG.debug("%s = masked" % format_split(masked_split, taxon_set=taxon_set)) _LOG.debug("%s = raw" % format_split(to_consume_path[-1].split_bitmask, taxon_set=taxon_set)) for k, v in to_mod_relevant_splits.iteritems(): _LOG.debug("%s in to_mod_relevant_splits" % format_split(k, taxon_set=taxon_set)) assert to_mod_path is not None to_mod_head = to_mod_path[-1].head_node to_mod_head_edge = to_mod_head.edge to_consume_head = to_consume_path[-1].head_node for child in to_consume_head.child_nodes(): if (child.edge.split_bitmask & leaf_intersection) == 0: # child is the root of a subtree that has no children in the leaf_intersection to_mod_head.add_child(child) to_mod_head_edge.split_bitmask |= child.edge.split_bitmask if len(to_consume_path) > 1: if len(to_mod_path) > 1: # collision if gordons_supertree: for edge in to_mod_path[2:]: p = edge.tail_node c = edge.head_node sibs = p.child_nodes() for sib in sibs: _LOG.debug("sib is %s" % (sib.compose_newick())) if sib is not c: if not sib.is_leaf(): collapse_clade(sib) collapse_edge(sib.edge) collapse_edge(p.edge) mid_node = to_mod_path[0].head_node for edge in to_consume_path[1:]: p = edge.tail_node avoid = edge.head_node for child in p.child_nodes(): _LOG.debug("child is %s" % (child.compose_newick())) if child is not avoid: mid_node.add_child(child) collapse_clade(child) if not child.is_leaf(): collapse_edge(child.edge) mid_node.edge.split_bitmask |= child.edge.split_bitmask else: for edge in to_mod_path[1:-1]: collapse_edge(edge) mid_node = to_mod_path[0].head_node for edge in to_consume_path[1:]: p = edge.tail_node avoid = edge.head_node for child in p.child_nodes(): if child is not avoid: mid_node.add_child(child) mid_node.edge.split_bitmask |= child.edge.split_bitmask else: # we have to move the subtrees from to_consume to to_modify to_mod_edge = to_mod_path[0] to_mod_tail, to_mod_head = to_mod_edge.tail_node, to_mod_edge.head_node deepest_edge_to_move = to_consume_path[0] deepest_node_to_move = deepest_edge_to_move.head_node tipmost_edge_to_move = to_consume_path[-1] tipmost_node_to_move = tipmost_edge_to_move.tail_node prev_head = tipmost_edge_to_move.head_node to_mod_tail.add_child(deepest_node_to_move) to_mod_tail.remove_child(to_mod_head) tipmost_node_to_move.add_child(to_mod_head) tipmost_node_to_move.remove_child(prev_head) encode_splits(to_modify)