def decompose_trees(tree, max_subset_size):
    """
    """
    next_trees = [tree]
    done_trees = []
    while len(next_trees) > 0:
        trees = next_trees
        next_trees = []
        for tree in trees:
            t1, t2 = bisect_tree(tree,
                                 breaking_edge_style="centroid")
            n1 = t1.n_leaves
            n2 = t2.n_leaves
            if n1 > max_subset_size:
                next_trees.append(t1)
            else:
                if n1 >= 5:
                    done_trees.append(t1)
                else:
                    sys.exit("T1 has fewer than 5 leaves!")
            if n2 > max_subset_size:
                next_trees.append(t2)
            else:
                if n2 >= 5:
                    done_trees.append(t2)
                else:
                    sys.exit("T2 has fewer than 5 leaves!")
    return done_trees
Exemplo n.º 2
0
    def _do_test_longest(self, t, level="1"):
        if t.n_leaves < 3:
            return
        before_br_len = [
            e.length for e in t._tree.preorder_edge_iter() if e.length
        ]
        _LOG.debug("code=%s\n before = %s" % (level, t.compose_newick()))
        _LOG.debug(" after len(before_br_len) = %d" % (len(before_br_len)))
        num_real_edges_before = len(before_br_len)
        if len(t._tree.seed_node.child_nodes()) < 3:
            num_real_edges_before -= 1
        t1, t2 = bisect_tree(t, 'longest')
        after_1_br_len = [
            e.length for e in t1._tree.preorder_edge_iter() if e.length
        ]
        after_2_br_len = [
            e.length for e in t2._tree.preorder_edge_iter() if e.length
        ]
        num_branches_1 = len(after_1_br_len)
        num_branches_2 = len(after_2_br_len)
        if num_branches_2 == 0:
            num_branches_2 = 1
        if num_branches_1 == 0:
            num_branches_1 = 1
        expected_diff = 3
        if num_branches_2 == 1:
            expected_diff -= 2
        if num_branches_1 == 1:
            expected_diff -= 2
        # cherries are rooted, so they make 1 edge look like 2
        if len(t1._tree.seed_node.child_nodes()) == 2:
            num_branches_1 -= 1
        if len(t2._tree.seed_node.child_nodes()) == 2:
            num_branches_2 -= 1

        _LOG.debug(" after 1 = %s" % (t1.compose_newick()))
        _LOG.debug(" after num_branches_1 = %d" % (num_branches_1))
        _LOG.debug(" after 2 = %s" % (t2.compose_newick()))
        _LOG.debug(" after num_branches_2 = %d" % (num_branches_2))

        #self.assertEqual(len(before_br_len), expected_diff + num_branches_1 + num_branches_2)
        before_br_len.sort(reverse=True)
        before_br_len.pop(0)
        before_sum = sum(before_br_len)
        after_sum = sum(after_1_br_len) + sum(after_2_br_len)
        diff = before_sum - after_sum
        self.assertTrue(abs(diff) < TOL)
        if t1.n_leaves > 2:
            nl = level + ".1"
            self._do_test_longest(t1, level=nl)
        if t2.n_leaves > 2:
            nl = level + ".2"
            self._do_test_longest(t2, level=nl)
Exemplo n.º 3
0
    def _do_test_longest(self, t, level="1"):
        if t.n_leaves < 3:
            return
        before_br_len = [e.length for e in t._tree.preorder_edge_iter() if e.length]
        _LOG.debug("code=%s\n before = %s" % (level, t.compose_newick()))
        _LOG.debug(" after len(before_br_len) = %d" % (len(before_br_len)))
        num_real_edges_before = len(before_br_len)
        if len(t._tree.seed_node.child_nodes()) < 3:
            num_real_edges_before -= 1
        t1, t2 = bisect_tree(t, 'longest')
        after_1_br_len = [e.length for e in t1._tree.preorder_edge_iter() if e.length]
        after_2_br_len = [e.length for e in t2._tree.preorder_edge_iter() if e.length]
        num_branches_1 = len(after_1_br_len)
        num_branches_2 = len(after_2_br_len)
        if num_branches_2 == 0:
            num_branches_2 = 1
        if num_branches_1 == 0:
            num_branches_1 = 1
        expected_diff = 3
        if num_branches_2 == 1:
            expected_diff -= 2
        if num_branches_1 == 1:
            expected_diff -= 2
        # cherries are rooted, so they make 1 edge look like 2
        if len(t1._tree.seed_node.child_nodes()) == 2:
            num_branches_1 -= 1
        if len(t2._tree.seed_node.child_nodes()) == 2:
            num_branches_2 -= 1

        _LOG.debug(" after 1 = %s" % (t1.compose_newick()))
        _LOG.debug(" after num_branches_1 = %d" % (num_branches_1))
        _LOG.debug(" after 2 = %s" % (t2.compose_newick()))
        _LOG.debug(" after num_branches_2 = %d" % (num_branches_2))

        #self.assertEqual(len(before_br_len), expected_diff + num_branches_1 + num_branches_2)
        before_br_len.sort(reverse=True)
        before_br_len.pop(0)
        before_sum = sum(before_br_len)
        after_sum = sum(after_1_br_len) + sum(after_2_br_len)
        diff = before_sum - after_sum
        self.assertTrue(abs(diff) < TOL)
        if t1.n_leaves > 2:
            nl = level+ ".1"
            self._do_test_longest(t1, level=nl)
        if t2.n_leaves > 2:
            nl = level+ ".2"
            self._do_test_longest(t2, level=nl)
Exemplo n.º 4
0
def decompose_trees(tree, max_subset_size):
    """
    """
    next_trees = [tree]
    done_trees = []

    while len(next_trees) > 0:
        trees = next_trees
        next_trees = []
        for tree in trees:
            t1, t2 = bisect_tree(tree, breaking_edge_style="centroid")
            no1 = t1.n_leaves
            no2 = t2.n_leaves
            d_t1 = t1._tree
            d_t2 = t2._tree
            d_t1_mat = d_t1.phylogenetic_distance_matrix()
            d_t2_mat = d_t2.phylogenetic_distance_matrix()

            diam_t1 = -1
            diam_t2 = -1

            for i, n1 in enumerate(d_t1.taxon_namespace[:-1]):
                for n2 in d_t1.taxon_namespace[i + 1:]:
                    diam_t1 = max(diam_t1, d_t1_mat(n1, n2))

            for i, n1 in enumerate(d_t2.taxon_namespace[:-1]):
                for n2 in d_t2.taxon_namespace[i + 1:]:
                    diam_t2 = max(diam_t2, d_t2_mat(n1, n2))

            if no1 > max_subset_size:
                next_trees.append(t1)
            else:
                if no1 >= 5:
                    done_trees.append(t1)
                    # print(diam_t1)
                else:
                    sys.exit("T1 has fewer than 5 leaves!")
            if no2 > max_subset_size:
                next_trees.append(t2)
            else:
                if no2 >= 5:
                    done_trees.append(t2)
                    # print(diam_t2)
                else:
                    sys.exit("T2 has fewer than 5 leaves!")
    return done_trees
Exemplo n.º 5
0
def decompose_phylogeny(phy, max_size, min_size):
    trees_map = []
    tree_list = [phy]
    while len(tree_list) > 0:
        tmp_phy = tree_list.pop()
        t1, t2 = bisect_tree(tree=tmp_phy,
                             breaking_edge_style='midpoint',
                             max_size=max_size)
        if t1.count_leaves() > min_size:
            tree_list.append(deepcopy(t1))
        else:
            trees_map.append(deepcopy(t1))
        if t2.count_leaves() > min_size:
            tree_list.append(deepcopy(t2))
        else:
            trees_map.append(deepcopy(t2))
    return trees_map
Exemplo n.º 6
0
def main(args):
    # Step 1: Decompose tree
    tree = dendropy.Tree.get(path=args.input_tree_file, schema="newick")
    tree.resolve_polytomies(limit=2, update_bipartitions=True)
    tree = PhylogeneticTree(tree)
    t1, t2 = bisect_tree(tree)
    trees = [t1, t2]

    # Step 2: Write out leaf subsets
    # i = 1
    i = 0

    keep1 = t1.leaf_node_names()
    with open(args.output + "/A.lab", "w") as f:
        f.write("\n".join(keep1))

    keep2 = t2.leaf_node_names()
    with open(args.output + "/B.lab", "w") as f:
        f.write("\n".join(keep2))
Exemplo n.º 7
0
    def _do_test_centroid(self, t, level="1"):
        if t.n_leaves < 5:
            return

        t.calc_splits()

        t1, t2 = bisect_tree(t, 'centroid')
        assert t1.n_leaves + t2.n_leaves == t.n_leaves
        # indent = level.count(".")

        # print("==============\nInput tree has %s leaf nodes." % t.n_leaves)
        # print("Subtree 1 tree has %s leaf nodes." % t1.n_leaves)
        # print("Subtree 2 tree has %s leaf nodes." % t2.n_leaves)
        # print("==============\n")

        if t1.n_leaves > 2:
            nl = level + ".1"

            self._do_test_centroid(t1, level=nl)
        if t2.n_leaves > 2:
            nl = level + ".2"

            self._do_test_centroid(t2, level=nl)
Exemplo n.º 8
0
    def _do_test_centroid(self, t, level="1"):
        if t.n_leaves < 5:
            return

        t.calc_splits()

        t1, t2 = bisect_tree(t, 'centroid')
        assert t1.n_leaves + t2.n_leaves == t.n_leaves
        # indent = level.count(".")

        # print("==============\nInput tree has %s leaf nodes." % t.n_leaves)
        # print("Subtree 1 tree has %s leaf nodes." % t1.n_leaves)
        # print("Subtree 2 tree has %s leaf nodes." % t2.n_leaves)
        # print("==============\n")

        if t1.n_leaves > 2:
            nl = level+ ".1"

            self._do_test_centroid(t1, level=nl)
        if t2.n_leaves > 2:
            nl = level+ ".2"

            self._do_test_centroid(t2, level=nl)