Пример #1
0
    def test_extend(self):
        """Extend a few nodes"""
        second_tree = TreeNode.read(StringIO(u"(x1,y1)z1;"))
        third_tree = TreeNode.read(StringIO(u"(x2,y2)z2;"))
        first_tree = TreeNode.read(StringIO(u"(x1,y1)z1;"))
        fourth_tree = TreeNode.read(StringIO(u"(x2,y2)z2;"))
        self.simple_t.extend([second_tree, third_tree])

        first_tree.extend(fourth_tree.children)
        self.assertEqual(0, len(fourth_tree.children))
        self.assertEqual(first_tree.children[0].name, "x1")
        self.assertEqual(first_tree.children[1].name, "y1")
        self.assertEqual(first_tree.children[2].name, "x2")
        self.assertEqual(first_tree.children[3].name, "y2")

        self.assertEqual(self.simple_t.children[0].name, "i1")
        self.assertEqual(self.simple_t.children[1].name, "i2")
        self.assertEqual(self.simple_t.children[2].name, "z1")
        self.assertEqual(self.simple_t.children[3].name, "z2")
        self.assertEqual(len(self.simple_t.children), 4)
        self.assertEqual(self.simple_t.children[2].children[0].name, "x1")
        self.assertEqual(self.simple_t.children[2].children[1].name, "y1")
        self.assertEqual(self.simple_t.children[3].children[0].name, "x2")
        self.assertEqual(self.simple_t.children[3].children[1].name, "y2")
        self.assertIs(second_tree.parent, self.simple_t)
        self.assertIs(third_tree.parent, self.simple_t)
Пример #2
0
    def setUp(self):
        """Prep the self"""
        self.simple_t = TreeNode.read(StringIO(u"((a,b)i1,(c,d)i2)root;"))
        nodes = dict([(x, TreeNode(x)) for x in "abcdefgh"])
        nodes["a"].append(nodes["b"])
        nodes["b"].append(nodes["c"])
        nodes["c"].append(nodes["d"])
        nodes["c"].append(nodes["e"])
        nodes["c"].append(nodes["f"])
        nodes["f"].append(nodes["g"])
        nodes["a"].append(nodes["h"])
        self.TreeNode = nodes
        self.TreeRoot = nodes["a"]

        def rev_f(items):
            items.reverse()

        def rotate_f(items):
            tmp = items[-1]
            items[1:] = items[:-1]
            items[0] = tmp

        self.rev_f = rev_f
        self.rotate_f = rotate_f
        self.complex_tree = TreeNode.read(StringIO(u"(((a,b)int1,(x,y,(w,z)int" "2,(c,d)int3)int4),(e,f)int" "5);"))
Пример #3
0
    def test_extend(self):
        """Extend a few nodes"""
        second_tree = TreeNode.read(io.StringIO("(x1,y1)z1;"))
        third_tree = TreeNode.read(io.StringIO("(x2,y2)z2;"))
        first_tree = TreeNode.read(io.StringIO("(x1,y1)z1;"))
        fourth_tree = TreeNode.read(io.StringIO("(x2,y2)z2;"))
        self.simple_t.extend([second_tree, third_tree])

        first_tree.extend(fourth_tree.children)
        self.assertEqual(0, len(fourth_tree.children))
        self.assertEqual(first_tree.children[0].name, 'x1')
        self.assertEqual(first_tree.children[1].name, 'y1')
        self.assertEqual(first_tree.children[2].name, 'x2')
        self.assertEqual(first_tree.children[3].name, 'y2')

        self.assertEqual(self.simple_t.children[0].name, 'i1')
        self.assertEqual(self.simple_t.children[1].name, 'i2')
        self.assertEqual(self.simple_t.children[2].name, 'z1')
        self.assertEqual(self.simple_t.children[3].name, 'z2')
        self.assertEqual(len(self.simple_t.children), 4)
        self.assertEqual(self.simple_t.children[2].children[0].name, 'x1')
        self.assertEqual(self.simple_t.children[2].children[1].name, 'y1')
        self.assertEqual(self.simple_t.children[3].children[0].name, 'x2')
        self.assertEqual(self.simple_t.children[3].children[1].name, 'y2')
        self.assertIs(second_tree.parent, self.simple_t)
        self.assertIs(third_tree.parent, self.simple_t)
Пример #4
0
 def test_reformat_riatahgt(self):
     """ Test functionality of reformat_riatahgt()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nex")
     reformat_riatahgt(gene_tree_1,
                       species_tree,
                       output_tree_fp)
     reformat_tree_exp = [
         "#NEXUS\n", "BEGIN TREES;\n",
         "Tree speciesTree = "
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374):1.594016;\n",
         "Tree geneTree = "
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,((SE002:"
         "6.0534057,SE007:6.0534057):0.4589905,((((SE001:"
         "2.1494876,SE010:2.1494876):3.7761166,SE008:"
         "5.9256042):0.2102448,(SE006:5.2329068,SE009:"
         "5.2329068):0.9029422):0.2054233,SE005:6.3412723):"
         "0.1711239):1.174147):1.594016;\n",
         "END;\n",
         "BEGIN PHYLONET;\n",
         "RIATAHGT speciesTree {geneTree};\n",
         "END;\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
Пример #5
0
    def test_tip_tip_distances_missing_length(self):
        t = TreeNode.read(io.StringIO("((a,b:6)c:4,(d,e:0)f);"))
        exp_t = TreeNode.read(io.StringIO("((a:0,b:6)c:4,(d:0,e:0)f:0);"))
        exp_t_dm = exp_t.tip_tip_distances()

        t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances)
        self.assertEqual(t_dm, exp_t_dm)
Пример #6
0
    def setUp(self):
        """Prep the self"""
        self.simple_t = TreeNode.read(io.StringIO("((a,b)i1,(c,d)i2)root;"))
        nodes = dict([(x, TreeNode(x)) for x in 'abcdefgh'])
        nodes['a'].append(nodes['b'])
        nodes['b'].append(nodes['c'])
        nodes['c'].append(nodes['d'])
        nodes['c'].append(nodes['e'])
        nodes['c'].append(nodes['f'])
        nodes['f'].append(nodes['g'])
        nodes['a'].append(nodes['h'])
        self.TreeNode = nodes
        self.TreeRoot = nodes['a']

        def rev_f(items):
            items.reverse()

        def rotate_f(items):
            tmp = items[-1]
            items[1:] = items[:-1]
            items[0] = tmp

        self.rev_f = rev_f
        self.rotate_f = rotate_f
        self.complex_tree = TreeNode.read(io.StringIO(
            "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);"))
Пример #7
0
 def test_species_gene_mapping_check_species_labels(self):
     species_tree = TreeNode.read(self.species_tree_2_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     self.assertRaises(ValueError,
                       species_gene_mapping,
                       gene_tree=gene_tree_3,
                       species_tree=species_tree)
Пример #8
0
    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        # test for first tree: contains singleton outgroup
        t1 = TreeNode.read(io.StringIO('(((a,b),c),(d,e));'))
        t2 = TreeNode.read(io.StringIO('(((a,b),(c,d)),(e,f));'))
        t3 = TreeNode.read(io.StringIO('(((a,b,c),(d)),(e,f));'))

        id_1, child_1 = t1.index_tree()
        nodes_1 = [n.id for n in t1.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        npt.assert_equal(child_1, np.array([[2, 0, 1], [6, 2, 3], [7, 4, 5],
                                            [8, 6, 7]]))

        # test for second tree: strictly bifurcating
        id_2, child_2 = t2.index_tree()
        nodes_2 = [n.id for n in t2.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        npt.assert_equal(child_2, np.array([[4, 0, 1], [5, 2, 3],
                                            [8, 4, 5], [9, 6, 7],
                                            [10, 8, 9]]))

        # test for third tree: contains trifurcation and single-child parent
        id_3, child_3 = t3.index_tree()
        nodes_3 = [n.id for n in t3.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        npt.assert_equal(child_3, np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5],
                                            [9, 6, 7], [10, 8, 9]]))
Пример #9
0
    def test_biom_match_tips_intersect_columns(self):
        # table has less columns than tree tips
        table = Table(
            np.array([[0, 0, 1],
                      [2, 3, 4],
                      [5, 5, 3],
                      [0, 0, 1]]).T,
            ['a', 'b', 'd'],
            ['s1', 's2', 's3', 's4'])

        tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
        table = Table(
            np.array([[0, 0, 1],
                      [2, 3, 4],
                      [5, 5, 3],
                      [0, 0, 1]]).T,
            ['a', 'b', 'd'],
            ['s1', 's2', 's3', 's4'])

        exp_table = Table(
            np.array([[1, 0, 0],
                      [4, 2, 3],
                      [3, 5, 5],
                      [1, 0, 0]]).T,
            ['d', 'a', 'b'],
            ['s1', 's2', 's3', 's4'])

        exp_tree = TreeNode.read([u"(d,(a,b)f)r;"])
        res_table, res_tree = match_tips(table, tree)
        self.assertEqual(exp_table, res_table)
        self.assertEqual(str(exp_tree), str(res_tree))
Пример #10
0
    def test_compare_subsets(self):
        """compare_subsets should return the fraction of shared subsets"""
        t = TreeNode.read(io.StringIO('((H,G),(R,M));'))
        t2 = TreeNode.read(io.StringIO('(((H,G),R),M);'))
        t4 = TreeNode.read(io.StringIO('(((H,G),(O,R)),X);'))

        result = t.compare_subsets(t)
        self.assertEqual(result, 0)

        result = t2.compare_subsets(t2)
        self.assertEqual(result, 0)

        result = t.compare_subsets(t2)
        self.assertEqual(result, 0.5)

        result = t.compare_subsets(t4)
        self.assertEqual(result, 1 - 2. / 5)

        result = t.compare_subsets(t4, exclude_absent_taxa=True)
        self.assertEqual(result, 1 - 2. / 3)

        result = t.compare_subsets(self.TreeRoot, exclude_absent_taxa=True)
        self.assertEqual(result, 1)

        result = t.compare_subsets(self.TreeRoot)
        self.assertEqual(result, 1)
Пример #11
0
    def test_commonname_promotion(self):
        """correctly promote names if possible"""
        consensus_tree = TreeNode.read(StringIO(u"(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;"))
        rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.name[0]]
        data = StringIO(u"((((1)s1,(2)s2),((3)s3,(4)s5)))o1;")
        lookup = dict([(n.name, n)
                      for n in consensus_tree.traverse(include_self=True)])
        exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1';"
        t = TreeNode.read(data)
        t.Rank = 3
        t.children[0].Rank = None
        t.children[0].children[0].Rank = None
        t.children[0].children[1].Rank = None
        t.children[0].children[0].children[0].Rank = 6
        t.children[0].children[0].children[1].Rank = 6
        t.children[0].children[1].children[0].Rank = 6
        t.children[0].children[1].children[1].Rank = 6
        backfill_names_gap(t, lookup)
        commonname_promotion(t)

        fp = StringIO()
        t.write(fp)

        self.assertEqual(fp.getvalue().strip(), exp)
Пример #12
0
    def test_majority_rule(self):
        trees = [
            TreeNode.read(StringIO("(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));")),
            TreeNode.read(StringIO("(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),(((J,H),D),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,(((J,H),D),C))))));")),
            TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));")),
            TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));"))]

        exp = TreeNode.read(StringIO("(((E,(G,(F,I),(C,(D,J,H)))),B),A);"))
        obs = majority_rule(trees)
        self.assertEqual(exp.compare_subsets(obs[0]), 0.0)
        self.assertEqual(len(obs), 1)

        tree = obs[0]
        exp_supports = sorted([9.0, 9.0, 9.0, 6.0, 6.0, 6.0])
        obs_supports = sorted([n.support for n in tree.non_tips()])
        self.assertEqual(obs_supports, exp_supports)

        obs = majority_rule(trees, weights=np.ones(len(trees)) * 2)
        self.assertEqual(exp.compare_subsets(obs[0]), 0.0)
        self.assertEqual(len(obs), 1)

        tree = obs[0]
        exp_supports = sorted([18.0, 18.0, 12.0, 18.0, 12.0, 12.0])
        obs_supports = sorted([n.support for n in tree.non_tips()])

        with self.assertRaises(ValueError):
            majority_rule(trees, weights=[1, 2])
Пример #13
0
def _main(gene_tree_fp, species_tree_fp, gene_msa_fa_fp, output_tree_fp, output_msa_phy_fp, method):
    """ Reformat trees to input accepted by various HGT detection methods.

    Species tree can be multifurcating, however will be converted to
    bifurcating trees for software that require them. Leaf labels of
    species tree and gene tree must match, however the label
    SPECIES_GENE is acceptable for multiple genes in the gene
    tree. Leaf labels must also be at most 10 characters long (for
    PHYLIP manipulations).
    """

    # add function to check where tree is multifurcating and the labeling
    # is correct
    gene_tree = TreeNode.read(gene_tree_fp, format="newick")
    species_tree = TreeNode.read(species_tree_fp, format="newick")

    if method == "ranger-dtl":
        reformat_rangerdtl(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "trex":
        reformat_trex(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "riata-hgt":
        reformat_riatahgt(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "jane4":
        reformat_jane4(gene_tree=gene_tree, species_tree=species_tree, output_tree_fp=output_tree_fp)
    elif method == "tree-puzzle":
        reformat_treepuzzle(
            gene_tree=gene_tree,
            species_tree=species_tree,
            gene_msa_fa_fp=gene_msa_fa_fp,
            output_tree_fp=output_tree_fp,
            output_msa_phy_fp=output_msa_phy_fp,
        )
Пример #14
0
    def setUp(self):
        A = np.array  # aliasing for the sake of pep8
        self.table = pd.DataFrame({
            's1': A([1., 1.]),
            's2': A([1., 2.]),
            's3': A([1., 3.]),
            's4': A([1., 4.]),
            's5': A([1., 5.])},
            index=['Y2', 'Y1']).T
        self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        self.metadata = pd.DataFrame({
            'lame': [1, 1, 1, 1, 1],
            'real': [1, 2, 3, 4, 5]
        }, index=['s1', 's2', 's3', 's4', 's5'])

        np.random.seed(0)
        n = 15
        a = np.array([1, 4.2, 5.3, -2.2, 8])
        x1 = np.linspace(.01, 0.1, n)
        x2 = np.logspace(0, 0.01, n)
        x3 = np.exp(np.linspace(0, 0.01, n))
        x4 = x1 ** 2
        self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        y = (a[0] + a[1]*x1 + a[2]*x2 + a[3]*x3 + a[4]*x4 +
             np.random.normal(size=n))
        sy = np.vstack((-y/10, -y)).T
        self.y = pd.DataFrame(sy, columns=['y0', 'y1'])
        self.t2 = TreeNode.read([r"((a,b)y1,c)y0;"])

        self.results = "results"
        os.mkdir(self.results)
Пример #15
0
    def test_validate_otu_ids_and_tree(self):
        # basic valid input
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [1, 1, 1]
        otu_ids = ["OTU1", "OTU2", "OTU3"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # all tips observed
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [1, 1, 1, 1, 1]
        otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # no tips observed
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = []
        otu_ids = []
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)

        # all counts zero
        t = TreeNode.read(
            StringIO(u"(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:" u"0.75,OTU5:0.75):1.25):0.0)root;")
        )
        counts = [0, 0, 0, 0, 0]
        otu_ids = ["OTU1", "OTU2", "OTU3", "OTU4", "OTU5"]
        self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)
Пример #16
0
 def test_reformat_jane4(self):
     """ Test functionality of reformat_jane4()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nex")
     reformat_jane4(gene_tree_1,
                    species_tree,
                    output_tree_fp)
     reformat_tree_exp = [
         "#NEXUS\n", "begin host;\n",
         "tree host = "
         "(((((((SE001,SE010),SE008),(SE006,SE009)),SE005),SE004),SE003),"
         "(SE002,SE007));\n", "\n",
         "endblock;\n", "begin parasite;\n",
         "tree parasite = "
         "(((((((SE001_01623,SE010_01623),SE008_01623),(SE006_01623,"
         "SE009_01623)),SE005_01623),SE004_01623),SE003_01623),"
         "((SE002_01623,SE007_01623),((((SE001_04123,SE010_04123),"
         "SE008_04123),(SE006_04123,SE009_04123)),SE005_04123)));\n", "\n",
         "endblock;\n",
         "begin distribution;\n",
         "Range SE010_01623:SE010, SE010_04123:SE010, SE009_01623:SE009, "
         "SE009_04123:SE009, SE008_01623:SE008, SE008_04123:SE008, "
         "SE007_01623:SE007, SE006_01623:SE006, SE006_04123:SE006, "
         "SE005_01623:SE005, SE005_04123:SE005, SE004_01623:SE004, "
         "SE003_01623:SE003, SE002_01623:SE002, SE001_01623:SE001, "
         "SE001_04123:SE001;\n",
         "endblock;\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
Пример #17
0
    def test_DndParser(self):
        """DndParser tests"""
        t_str = "(A_a,(B:1.0,C),'D_e':0.5)E;"
        tree_unesc = TreeNode.from_newick(t_str, unescape_name=True)
        tree_esc = TreeNode.from_newick(t_str, unescape_name=False)

        self.assertEqual(tree_unesc.name, 'E')
        self.assertEqual(tree_unesc.children[0].name, 'A a')
        self.assertEqual(tree_unesc.children[1].children[0].name, 'B')
        self.assertEqual(tree_unesc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_unesc.children[1].children[1].name, 'C')
        self.assertEqual(tree_unesc.children[2].name, 'D_e')
        self.assertEqual(tree_unesc.children[2].length, 0.5)

        self.assertEqual(tree_esc.name, 'E')
        self.assertEqual(tree_esc.children[0].name, 'A_a')
        self.assertEqual(tree_esc.children[1].children[0].name, 'B')
        self.assertEqual(tree_esc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_esc.children[1].children[1].name, 'C')
        self.assertEqual(tree_esc.children[2].name, "'D_e'")
        self.assertEqual(tree_esc.children[2].length, 0.5)

        reload_test = tree_esc.to_newick(with_distances=True,
                                         escape_name=False)
        obs = TreeNode.from_newick(reload_test, unescape_name=False)
        self.assertEqual(obs.to_newick(with_distances=True),
                         tree_esc.to_newick(with_distances=True))
        reload_test = tree_unesc.to_newick(with_distances=True,
                                           escape_name=False)
        obs = TreeNode.from_newick(reload_test, unescape_name=False)
        self.assertEqual(obs.to_newick(with_distances=True),
                         tree_unesc.to_newick(with_distances=True))
Пример #18
0
 def test_reformat_treepuzzle(self):
     """ Test functionality of reformat_treepuzzle()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nwk")
     output_msa_phy_fp = join(self.working_dir, "gene_tree_3.phy")
     reformat_treepuzzle(gene_tree_3,
                         species_tree,
                         self.msa_fa_3_fp,
                         output_tree_fp,
                         output_msa_phy_fp)
     reformat_tree_exp = [
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374);\n",
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,SE002:"
         "7.6865432);\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
     msa_fa = TabularMSA.read(output_msa_phy_fp, constructor=Protein)
     labels_exp = [u'SE001', u'SE002', u'SE003', u'SE004', u'SE005',
                   u'SE006', u'SE008', u'SE009', u'SE010']
     labels_act = list(msa_fa.index)
     self.assertListEqual(labels_exp, labels_act)
Пример #19
0
    def setUp(self):
        self.table1 = np.array(
           [[1, 3, 0, 1, 0],
            [0, 2, 0, 4, 4],
            [0, 0, 6, 2, 1],
            [0, 0, 1, 1, 1],
            [5, 3, 5, 0, 0],
            [0, 0, 0, 3, 5]])
        self.sids1 = list('ABCDEF')
        self.oids1 = ['OTU%d' % i for i in range(1, 6)]
        self.t1 = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,OTU5:0.75):1.25):0.0)root;'))
        self.t1_w_extra_tips = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0'
                     u')root;'))

        self.t2 = TreeNode.read(
            StringIO(u'((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)'
                     u'root;'))
        self.oids2 = ['OTU%d' % i for i in range(1, 5)]

        # the following table and tree are derived from the QIIME 1.9.1
        # "tiny-test" data
        tt_table_fp = get_data_path(
            os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data')
        tt_tree_fp = get_data_path(
            os.path.join('qiime-191-tt', 'tree.nwk'), 'data')

        self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1,
                                   index_col=0)
        self.q_tree = TreeNode.read(tt_tree_fp)
Пример #20
0
    def test_index_tree(self):
        """index_tree should produce correct index and node map"""
        # test for first tree: contains singleton outgroup
        t1 = TreeNode.read(StringIO(u'(((a,b),c),(d,e));'))
        t2 = TreeNode.read(StringIO(u'(((a,b),(c,d)),(e,f));'))
        t3 = TreeNode.read(StringIO(u'(((a,b,c),(d)),(e,f));'))

        id_1, child_1 = t1.index_tree()
        nodes_1 = [n.id for n in t1.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8])
        self.assertEqual(child_1, [(2, 0, 1), (6, 2, 3), (7, 4, 5), (8, 6, 7)])

        # test for second tree: strictly bifurcating
        id_2, child_2 = t2.index_tree()
        nodes_2 = [n.id for n in t2.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_2, [(4, 0, 1), (5, 2, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])

        # test for third tree: contains trifurcation and single-child parent
        id_3, child_3 = t3.index_tree()
        nodes_3 = [n.id for n in t3.traverse(self_before=False,
                   self_after=True)]
        self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10])
        self.assertEqual(child_3, [(4, 0, 2), (5, 3, 3), (8, 4, 5), (9, 6, 7),
                                   (10, 8, 9)])
Пример #21
0
    def test_backfill_names_gap(self):
        """correctly backfill names"""
        consensus_tree = TreeNode.from_newick(
            "(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;")
        rank_lookup = {'s': 6, 'g': 5, 'f': 4, 'o': 3, 'c': 2, 'p': 1, 'k': 0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.name[0]]
        input = "((((1)s1,(2)s2),((3)s3,(4)s5)))o1;"
        lookup = dict([(n.name, n)
                      for n in consensus_tree.traverse(include_self=True)])
        #exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1'"
        t = TreeNode.from_newick(input)
        t.Rank = 3
        t.children[0].Rank = None
        t.children[0].children[0].Rank = None
        t.children[0].children[1].Rank = None
        t.children[0].children[0].children[0].Rank = 6
        t.children[0].children[0].children[1].Rank = 6
        t.children[0].children[1].children[0].Rank = 6
        t.children[0].children[1].children[1].Rank = 6

        backfill_names_gap(t, lookup)

        self.assertEqual(t.BackFillNames, ['o1'])
        self.assertEqual(t.children[0].BackFillNames, [])
        self.assertEqual(t.children[0].children[0].BackFillNames, [])
        self.assertEqual(t.children[0].children[1].BackFillNames, [])
        self.assertEqual(t.children[0].children[0]
                         .children[0].BackFillNames, ['f1', 'g1', 's1'])
        self.assertEqual(t.children[0].children[0]
                         .children[1].BackFillNames, ['f1', 'g1', 's2'])
        self.assertEqual(t.children[0].children[1]
                         .children[0].BackFillNames, ['f1', 'g2', 's3'])
        self.assertEqual(t.children[0].children[1]
                         .children[1].BackFillNames, ['f1', 'g3', 's5'])
Пример #22
0
def _main(gene_tree_fp,
          species_tree_fp,
          gene_msa_fa_fp,
          output_tree_fp,
          output_msa_phy_fp,
          method):
    """ Call different reformatting functions depending on method used
        for HGT detection

        Species tree can be multifurcating, however will be converted to
        bifurcating trees for software that require them. Leaf labels of
        species tree and gene tree must match, however the label
        SPECIES_GENE is acceptable for multiple genes in the gene
        tree. Leaf labels must also be at most 10 characters long (for
        PHYLIP manipulations)

    Parameters
    ----------
    gene_tree_fp: string
        file path to gene tree in Newick format
    species_tree_fp: string
        file path to species tree in Newick format
    gene_msa_fa_fp: string
        file path to gene alignments in FASTA format
    output_tree_fp: string
        file path to output tree file (to be used an input file to HGT tool)
    output_msa_phy_fp: string
        file path to output MSA in PHYLIP format
    method: string
        the method to be used for HGT detection
    """

    # add function to check where tree is multifurcating and the labeling
    # is correct
    gene_tree = TreeNode.read(gene_tree_fp, format='newick')
    species_tree = TreeNode.read(species_tree_fp, format='newick')

    if method == 'ranger-dtl':
        reformat_rangerdtl(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'trex':
        reformat_trex(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'riata-hgt':
        reformat_riatahgt(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'jane4':
        reformat_jane4(gene_tree=gene_tree,
            species_tree=species_tree,
            output_tree_fp=output_tree_fp)
    elif method == 'tree-puzzle':
        reformat_treepuzzle(gene_tree=gene_tree,
            species_tree=species_tree,
            gene_msa_fa_fp=gene_msa_fa_fp,
            output_tree_fp=output_tree_fp,
            output_msa_phy_fp=output_msa_phy_fp)
Пример #23
0
    def test_collapse_no_table(self):
        # Collapse 2 levels
        tree_str = u"((a,b)c, d);"
        tree = TreeNode.read([tree_str])
        exp_tree = TreeNode.read([u";"])

        res_tree, _ = collapse(tree, level=2)
        self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
Пример #24
0
def compare_tip_to_tip_distances(tree_fh1, tree_fh2, method="pearson"):
    tree1 = TreeNode.read(tree_fh1)
    tree2 = TreeNode.read(tree_fh2)

    dm1 = tree1.tip_tip_distances()
    dm2 = tree2.tip_tip_distances()

    return mantel(dm1, dm2, strict=False, method=method)
Пример #25
0
    def _setup_linked_list(self, kwargs_list):
        last_node = None
        for idx, kwargs in enumerate(kwargs_list):
            new_node = TreeNode(**kwargs)

            if last_node is not None:
                new_node.append(last_node)
            last_node = new_node
        return last_node
Пример #26
0
    def setUp(self):
        data1 = [[0,  5,  9,  9,  8],
                 [5,  0, 10, 10,  9],
                 [9, 10,  0,  8,  7],
                 [9, 10,  8,  0,  3],
                 [8,  9,  7,  3,  0]]
        ids1 = list('abcde')
        self.dm1 = DistanceMatrix(data1, ids1)
        # this newick string was confirmed against http://www.trex.uqam.ca/
        # which generated the following (isomorphic) newick string:
        # (d:2.0000,e:1.0000,(c:4.0000,(a:2.0000,b:3.0000):3.0000):2.0000);
        self.expected1_str = ("(d:2.000000, (c:4.000000, (b:3.000000,"
                              " a:2.000000):3.000000):2.000000, e:1.000000);")
        self.expected1_TreeNode = TreeNode.read(
                io.StringIO(self.expected1_str))

        # this example was pulled from the Phylip manual
        # http://evolution.genetics.washington.edu/phylip/doc/neighbor.html
        data2 = [[0.0000, 1.6866, 1.7198, 1.6606, 1.5243, 1.6043, 1.5905],
                 [1.6866, 0.0000, 1.5232, 1.4841, 1.4465, 1.4389, 1.4629],
                 [1.7198, 1.5232, 0.0000, 0.7115, 0.5958, 0.6179, 0.5583],
                 [1.6606, 1.4841, 0.7115, 0.0000, 0.4631, 0.5061, 0.4710],
                 [1.5243, 1.4465, 0.5958, 0.4631, 0.0000, 0.3484, 0.3083],
                 [1.6043, 1.4389, 0.6179, 0.5061, 0.3484, 0.0000, 0.2692],
                 [1.5905, 1.4629, 0.5583, 0.4710, 0.3083, 0.2692, 0.0000]]
        ids2 = ["Bovine", "Mouse", "Gibbon", "Orang", "Gorilla", "Chimp",
                "Human"]
        self.dm2 = DistanceMatrix(data2, ids2)
        self.expected2_str = ("(Mouse:0.76891, (Gibbon:0.35793, (Orang:0.28469"
                              ", (Gorilla:0.15393, (Chimp:0.15167, Human:0.117"
                              "53):0.03982):0.02696):0.04648):0.42027, Bovine:"
                              "0.91769);")
        self.expected2_TreeNode = TreeNode.read(
                io.StringIO(self.expected2_str))

        data3 = [[0, 5, 4, 7, 6, 8],
                 [5, 0, 7, 10, 9, 11],
                 [4, 7, 0, 7, 6, 8],
                 [7, 10, 7, 0, 5, 8],
                 [6, 9, 6, 5, 0, 8],
                 [8, 11, 8, 8, 8, 0]]
        ids3 = map(str, range(6))
        self.dm3 = DistanceMatrix(data3, ids3)
        self.expected3_str = ("((((0:1.000000,1:4.000000):1.000000,2:2.000000"
                              "):1.250000,5:4.750000):0.750000,3:2.750000,4:2."
                              "250000);")
        self.expected3_TreeNode = TreeNode.read(
                io.StringIO(self.expected3_str))

        # this dm can yield negative branch lengths
        data4 = [[0,  5,  9,  9,  800],
                 [5,  0, 10, 10,  9],
                 [9, 10,  0,  8,  7],
                 [9, 10,  8,  0,  3],
                 [800,  9,  7,  3,  0]]
        ids4 = list('abcde')
        self.dm4 = DistanceMatrix(data4, ids4)
Пример #27
0
 def test_compare_tip_distances(self):
     t = TreeNode.read(io.StringIO('((H:1,G:1):2,(R:0.5,M:0.7):3);'))
     t2 = TreeNode.read(io.StringIO('(((H:1,G:1,O:1):2,R:3):1,X:4);'))
     obs = t.compare_tip_distances(t2)
     # note: common taxa are H, G, R (only)
     m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]])
     m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]])
     r = pearsonr(m1.flat, m2.flat)[0]
     self.assertAlmostEqual(obs, (1 - r) / 2)
Пример #28
0
    def test_tip_tip_distances_no_length(self):
        t = TreeNode.read(io.StringIO("((a,b)c,(d,e)f);"))
        exp_t = TreeNode.read(io.StringIO("((a:0,b:0)c:0,(d:0,e:0)f:0);"))
        exp_t_dm = exp_t.tip_tip_distances()

        t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances)
        self.assertEqual(t_dm, exp_t_dm)

        for node in t.preorder():
            self.assertIs(node.length, None)
Пример #29
0
def iter_newick_partitoned(fname):
    with open(fname) as fh:
        for line in fh:
            m = re.match(r'\[(.*)\](\(.*;)', line)
            if m is None:
                # Assume it's just a normal newick tree
                yield 1, TreeNode.read([line])
            else:
                l, t = m.groups()
                yield int(float(l)), TreeNode.read([t])
Пример #30
0
 def test_join_trees(self):
     """ Test concatenate Newick trees into one file (species, gene)
     """
     self.output_file = join(self.working_dir, 'output_file.nwk')
     gene_tree_1 = TreeNode.read(self.gene_tree_1_fp, format='newick')
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     join_trees(gene_tree_1, species_tree, self.output_file)
     with open(self.output_file, 'r') as out_f:
         species_gene_tree_1_obs = out_f.read()
     self.assertEqual(species_gene_tree_1_obs, species_gene_tree_1_exp)
Пример #31
0
 def testCountCladesTwoChildren(self):
     """
     In a tree with two children, one of which has two children, there are
     two clades.
     """
     njtree = NJTree()
     njtree.tree = TreeNode(children=[
         TreeNode(children=[
             TreeNode(name='a'),
             TreeNode(name='b'),
         ]),
         TreeNode(name='c'),
     ])
     self.assertEqual(
         {
             frozenset(['a', 'b']): 1,
             frozenset(['a', 'b', 'c']): 1,
         },
         njtree.countClades()
     )
Пример #32
0
    def test_linkage_matrix(self):
        # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/
        id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
        linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0],
                              [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0],
                              [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]])

        tree = TreeNode.from_linkage_matrix(linkage, id_list)
        self.assertEqual(
            "(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5,"
            "F:0.5):5.75):2.0):6.25):2.5);\n", str(tree))
Пример #33
0
def assign_ids(input_tree: skbio.TreeNode) -> skbio.TreeNode:

    t = input_tree.copy()
    t.bifurcate()
    ids = [
        '%sL-%s' % (i, uuid.uuid4())
        for i, n in enumerate(t.levelorder(include_self=True))
        if not n.is_tip()
    ]
    t = rename_internal_nodes(t, names=ids)
    return t
Пример #34
0
 def test_match_tips_intersect_tree_immutable(self):
     # tests to see if tree chnages.
     table = pd.DataFrame([[0, 0, 1],
                           [2, 3, 4],
                           [5, 5, 3],
                           [0, 0, 1]],
                          index=['s1', 's2', 's3', 's4'],
                          columns=['a', 'b', 'd'])
     tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
     match_tips(table, tree)
     self.assertEqual(str(tree), u"(((a,b)f,c),d)r;\n")
Пример #35
0
 def test_find_cache_bug(self):
     """First implementation did not force the cache to be at the root"""
     t = TreeNode.read(StringIO(u"((a,b)c,(d,e)f,(g,h)f);"))
     exp_tip_cache_keys = set(['a', 'b', 'd', 'e', 'g', 'h'])
     exp_non_tip_cache_keys = set(['c', 'f'])
     tip_a = t.children[0].children[0]
     tip_a.create_caches()
     self.assertEqual(tip_a._tip_cache, {})
     self.assertEqual(set(t._tip_cache), exp_tip_cache_keys)
     self.assertEqual(set(t._non_tip_cache), exp_non_tip_cache_keys)
     self.assertEqual(t._non_tip_cache['f'], [t.children[1], t.children[2]])
Пример #36
0
    def test_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])
        exp_keys = [t, t[0]]
        exp_basis = np.array([[0.18507216, 0.18507216, 0.62985567],
                              [0.14002925, 0.57597535, 0.28399541]])

        res_basis, res_keys = balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, list(res_keys))
Пример #37
0
    def setUp(self):
        self.table1 = [[1, 5], [2, 3], [0, 1]]
        self.sids1 = list('ABC')
        self.tree1 = TreeNode.read(
            StringIO('((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        self.oids1 = ['O1', 'O2']

        self.table2 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1],
                       [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0],
                       [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]]
        self.sids2 = list('ABCDEF')
Пример #38
0
def main():
    if len(sys.argv) < 3:
        sys.exit(__doc__)
    tree = TreeNode.read(sys.argv[1])
    clusters = {}
    with open(sys.argv[2], 'r') as f:
        for line in f:
            x = line.rstrip('\r\n').split('\t')
            clusters[x[0]] = x[1].split(',')
    res = append_taxa(tree, clusters)
    res.write(sys.stdout)
    def test_sparse_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d)r;"
        t = TreeNode.read([tree])
        exp_basis = coo_matrix(
            np.array([[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)],
                      [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])[:, ::-1])
        exp_keys = [t.name, t[0].name]
        res_basis, res_keys = sparse_balance_basis(t)

        assert_coo_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, res_keys)
Пример #40
0
    def test_unrooted_deepcopy(self):
        """Do an unrooted_copy"""
        t = TreeNode.read(StringIO(u"((a,(b,c)d)e,(f,g)h)i;"))
        exp = "(b,c,(a,((f,g)h)e)d)root;\n"
        obs = t.find('d').unrooted_deepcopy()
        self.assertEqual(str(obs), exp)

        t_ids = {id(n) for n in t.traverse()}
        obs_ids = {id(n) for n in obs.traverse()}

        self.assertEqual(t_ids.intersection(obs_ids), set())
Пример #41
0
    def test_root_above(self):
        # test rooted tree
        tree1 = TreeNode.read([
            '(((a:1.0,b:0.8)c:2.4,(d:0.8,e:0.6)f:1.2)g:0.4,'
            '(h:0.5,i:0.7)j:1.8)k;'
        ])

        tree1_cg = root_above(tree1.find('c'))
        exp = TreeNode.read([
            '((a:1.0,b:0.8)c:1.2,((d:0.8,e:0.6)f:1.2,(h:0.5,'
            'i:0.7)j:2.2)g:1.2);'
        ])
        self.assertTrue(_exact_compare(exp, tree1_cg))

        tree1_ij = root_above(tree1.find('i'))
        exp = TreeNode.read([
            '(i:0.35,(h:0.5,((a:1.0,b:0.8)c:2.4,(d:0.8,'
            'e:0.6)f:1.2)g:2.2)j:0.35);'
        ])
        self.assertTrue(_exact_compare(exp, tree1_ij))

        # test unrooted tree
        tree2 = TreeNode.read([
            '(((a:0.6,b:0.5)g:0.3,c:0.8)h:0.4,(d:0.4,'
            'e:0.5)i:0.5,f:0.9)j;'
        ])

        tree2_ag = root_above(tree2.find('a'))
        exp = TreeNode.read([
            '(a:0.3,(b:0.5,(c:0.8,((d:0.4,e:0.5)i:0.5,'
            'f:0.9)j:0.4)h:0.3)g:0.3);'
        ])
        self.assertTrue(_exact_compare(exp, tree2_ag))

        tree2_gh = root_above(tree2.find('g'))
        exp = TreeNode.read([
            '((a:0.6,b:0.5)g:0.15,(c:0.8,((d:0.4,e:0.5)i:0.5,'
            'f:0.9)j:0.4)h:0.15);'
        ])
        self.assertTrue(_exact_compare(exp, tree2_gh))

        # test unrooted tree with 1 basal node
        tree3 = TreeNode.read(
            ['(((a:0.4,b:0.3)e:0.1,(c:0.4,'
             'd:0.1)f:0.2)g:0.6)h:0.2;'])

        tree3_ae = root_above(tree3.find('a'))
        exp = TreeNode.read(
            ['(a:0.2,(b:0.3,((c:0.4,d:0.1)f:0.2,'
             'h:0.6)g:0.1)e:0.2);'])
        self.assertTrue(_exact_compare(exp, tree3_ae))
Пример #42
0
    def test_append(self):
        """Append a node to a tree"""
        second_tree = TreeNode.from_newick("(x,y)z;")
        self.simple_t.append(second_tree)

        self.assertEqual(self.simple_t.children[0].name, 'i1')
        self.assertEqual(self.simple_t.children[1].name, 'i2')
        self.assertEqual(self.simple_t.children[2].name, 'z')
        self.assertEqual(len(self.simple_t.children), 3)
        self.assertEqual(self.simple_t.children[2].children[0].name, 'x')
        self.assertEqual(self.simple_t.children[2].children[1].name, 'y')
        self.assertEqual(second_tree.parent, self.simple_t)
Пример #43
0
 def test_lca2(self):
     newick = '((((a,b)n6,c)n4,(d,e)n5)n2,(f,(g,h)n7)n3,i)n1;'
     tree = TreeNode.read([newick])
     msg = "'TreeNode' object has no attribute 'taxa'"
     with self.assertRaisesRegex(AttributeError, msg):
         lca2(tree, set('ab'))
     assign_taxa(tree)
     self.assertEqual(lca2(tree, set('a')).name, 'a')
     self.assertEqual(lca2(tree, set('ab')).name, 'n6')
     self.assertEqual(lca2(tree, set('ac')).name, 'n4')
     self.assertEqual(lca2(tree, set('ace')).name, 'n2')
     self.assertEqual(lca2(tree, set('bgi')).name, 'n1')
Пример #44
0
 def test_nonroot_negative_branchlengths(self):
     newicks = [
         '((b:-1)a:1)root:1;', '((b:100)a:-100)root:0;',
         '(b:1,c:-1)a:2;', '((b:-1)a:0)root;'
     ]
     for nwk in newicks:
         st = TreeNode.read([nwk])
         with self.assertRaisesRegex(
             ValueError,
             "must have nonnegative lengths"
         ):
             Tree.from_tree(st)
Пример #45
0
    def test_DndParser_list(self):
        """Make sure TreeNode.from_newick can handle list of strings"""
        t_str = ["(A_a,(B:1.0,C)", ",'D_e':0.5)E;"]
        tree_unesc = TreeNode.from_newick(t_str, unescape_name=True)

        self.assertEqual(tree_unesc.name, 'E')
        self.assertEqual(tree_unesc.children[0].name, 'A a')
        self.assertEqual(tree_unesc.children[1].children[0].name, 'B')
        self.assertEqual(tree_unesc.children[1].children[0].length, 1.0)
        self.assertEqual(tree_unesc.children[1].children[1].name, 'C')
        self.assertEqual(tree_unesc.children[2].name, 'D_e')
        self.assertEqual(tree_unesc.children[2].length, 0.5)
Пример #46
0
    def test_cladistic(self):
        tree1 = TreeNode.read(['((i,j)a,b)c;'])
        self.assertEqual('uni', cladistic(tree1, ['i']))
        self.assertEqual('mono', cladistic(tree1, ['i', 'j']))
        self.assertEqual('poly', cladistic(tree1, ['i', 'b']))
        msg = 'Node x is not in self'
        with self.assertRaisesRegex(MissingNodeError, msg):
            cladistic(tree1, ['x', 'b'])

        tree2 = TreeNode.read(['(((a,b),(c,d,x)),((e,g),h));'])
        self.assertEqual('uni', cladistic(tree2, ['a']))
        self.assertEqual('mono', cladistic(tree2, ['a', 'b', 'c', 'd', 'x']))
        self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
        msg = 'Node y is not in self'
        with self.assertRaisesRegex(MissingNodeError, msg):
            cladistic(tree2, ['y', 'b'])

        assign_taxa(tree2)
        self.assertEqual('uni', cladistic(tree2, ['a']))
        self.assertEqual('mono', cladistic(tree2, ['a', 'b']))
        self.assertEqual('poly', cladistic(tree2, ['g', 'h']))
Пример #47
0
    def test_is_ordered(self):
        """Test if a tree is ordered"""
        # test tree in increasing order
        tree1 = TreeNode.read(['((i,j)a,b)c;'])
        self.assertTrue(is_ordered(tree1))
        self.assertTrue(is_ordered(tree1, True))
        self.assertFalse(is_ordered(tree1, False))

        # test tree in both increasing and decreasing order
        tree2 = TreeNode.read(['(a, b);'])
        self.assertTrue(is_ordered(tree2))
        self.assertTrue(is_ordered(tree2, False))

        # test an unordered tree
        tree3 = TreeNode.read(['(((a,b),(c,d,x,y,z)),((e,g),h));'])
        self.assertFalse(is_ordered(tree3, True))
        self.assertFalse(is_ordered(tree3, False))

        # test tree in decreasing order
        tree5 = TreeNode.read(['((h,(e,g)),((a,b),(c,d,i)j));'])
        self.assertTrue(is_ordered(tree5, False))
Пример #48
0
    def test_to_array_nan_length_value(self):
        t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root;"))
        indexed = t.to_array(nan_length_value=None)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, np.nan], dtype=float))
        indexed = t.to_array(nan_length_value=0.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, 0.0], dtype=float))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 3, 42.0], dtype=float))

        t = TreeNode.read(StringIO(u"((a:1, b:2)c:3)root:4;"))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'], np.array([1, 2, 3, 4],
                                                     dtype=float))

        t = TreeNode.read(StringIO(u"((a:1, b:2)c)root;"))
        indexed = t.to_array(nan_length_value=42.0)
        npt.assert_equal(indexed['length'],
                         np.array([1, 2, 42.0, 42.0], dtype=float))
Пример #49
0
 def test_to_taxonomy(self):
     input_lineages = {
         '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
         '2': ['a', 'b', 'c', None, None, 'x', 'y'],
         '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
         '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'],
         '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n']
     }
     tree = TreeNode.from_taxonomy(input_lineages.items())
     exp = sorted(input_lineages.items())
     obs = [(n.name, lin) for n, lin in tree.to_taxonomy(allow_empty=True)]
     self.assertEqual(sorted(obs), exp)
Пример #50
0
def main():
    if len(sys.argv) < 2:
        sys.exit(__doc__)
    with fileinput.input() as f:
        tree = TreeNode.read(f)
    calc_bidi_minlevels(tree)
    calc_bidi_mindepths(tree)

    # print result
    print('\t'.join(('name', 'minlevel', 'mindepth')))
    for node in tree.levelorder(include_self=True):
        print('%s\t%d\t%f' % (node.name, node.minlevel, node.mindepth))
Пример #51
0
    def test_from_taxonomy(self):
        input_lineages = {
            '1': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
            '2': ['a', 'b', 'c', None, None, 'x', 'y'],
            '3': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
            '4': ['h', 'i', 'j', 'k', 'l', 'm', 'q'],
            '5': ['h', 'i', 'j', 'k', 'l', 'm', 'n']
        }
        exp = TreeNode.read(
            io.StringIO("((((((((1)g)f)e)d,((((2)y)x)))c)b)a,"
                        "(((((((3,5)n,(4)q)m)l)k)j)i)h);"))

        root = TreeNode.from_taxonomy(input_lineages.items())

        self.assertIs(type(root), TreeNode)

        self.assertEqual(root.compare_subsets(exp), 0.0)

        root = TreeNodeSubclass.from_taxonomy(input_lineages.items())

        self.assertIs(type(root), TreeNodeSubclass)
Пример #52
0
    def test_balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])

        exp_basis = np.array(
            [[np.sqrt(2. / 3), -np.sqrt(1. / 6), -np.sqrt(1. / 6)],
             [0, np.sqrt(1. / 2), -np.sqrt(1. / 2)]])
        exp_keys = [t, t[0]]
        res_basis, res_keys = _balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertItemsEqual(exp_keys, res_keys)
Пример #53
0
 def test_match_tips(self):
     table = pd.DataFrame([[0, 0, 1, 1],
                           [2, 2, 4, 4],
                           [5, 5, 3, 3],
                           [0, 0, 0, 1]],
                          index=['s1', 's2', 's3', 's4'],
                          columns=['a', 'b', 'c', 'd'])
     tree = TreeNode.read([u"(((a,b)f, c),d)r;"])
     exp_table, exp_tree = table, tree
     res_table, res_tree = match_tips(table, tree)
     pdt.assert_frame_equal(exp_table, res_table)
     self.assertEqual(str(exp_tree), str(res_tree))
Пример #54
0
 def test_random_tree(self):
     np.random.seed(0)
     t = random_linkage(10)
     exp_str = (
         '((7:0.0359448798595,8:0.0359448798595)y1:0.15902486847,'
         '((9:0.0235897432375,(4:0.00696620596189,6:0.00696620596189)'
         'y5:0.0166235372756)y3:0.0747173561014,(1:0.0648004111784,'
         '((0:0.00196516046521,3:0.00196516046521)y7:0.0367750400883,'
         '(2:0.0215653684975,5:0.0215653684975)y8:0.017174832056)'
         'y6:0.0260602106249)y4:0.0335066881605)y2:0.0966626489905)y0;\n')
     exp_tree = TreeNode.read([exp_str])
     self.assertEqual(t.ascii_art(), exp_tree.ascii_art())
Пример #55
0
def build_tree(relabeled_fingerprints: pd.DataFrame) -> TreeNode:
    '''
    This function makes a tree of relatedness between mass-spectrometry
    features using molecular substructure fingerprints.
    '''
    distmat = pairwise_distances(X=relabeled_fingerprints,
                                 Y=None, metric='jaccard')
    distsq = squareform(distmat, checks=False)
    linkage_matrix = linkage(distsq, method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix,
                                        relabeled_fingerprints.index.tolist())
    return tree
Пример #56
0
    def test_accumulate_to_ancestor(self):
        """Get the distance from a node to its ancestor"""
        t = TreeNode.read(
            StringIO(u"((a:0.1,b:0.2)c:0.3,(d:0.4,e)f:0.5)root;"))
        a = t.find('a')
        b = t.find('b')
        exp_to_root = 0.1 + 0.3
        obs_to_root = a.accumulate_to_ancestor(t)
        self.assertEqual(obs_to_root, exp_to_root)

        with self.assertRaises(NoParentError):
            a.accumulate_to_ancestor(b)
Пример #57
0
def _make_foundation_tree(in_name, all_std_error, ghost_tree_fp):
    process = subprocess.Popen("fasttree -nt -quiet "+in_name+"" +
                               " > "+ghost_tree_fp+"/nr_foundation_tree_gt.nwk", shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    std_output, std_error = process.communicate()
    all_std_error += "Error log for ghost-tree:\n\n\nSome genera may not contain " + \
                     "any errors, so the genus is listed as a placeholder\n\n"
    all_std_error += "FastTree warnings for the foundation_tree are:\n" + std_error + "\n"
    foundation_tree = TreeNode.read(ghost_tree_fp + "/nr_foundation_tree_gt.nwk")
    foundation_tree.root_at_midpoint()
    return foundation_tree, all_std_error
Пример #58
0
    def test_count_matrix_base_case(self):
        tree = u"(a,b);"
        t = TreeNode.read([tree])
        res, _ = _count_matrix(t)
        exp = {'k': 0, 'l': 1, 'r': 1, 't': 0, 'tips': 2}
        self.assertEqual(res[t], exp)

        exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
        self.assertEqual(res[t[0]], exp)

        exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
        self.assertEqual(res[t[1]], exp)
Пример #59
0
def main():
    if len(sys.argv) < 3:
        sys.exit(__doc__)

    tree1 = TreeNode.read(sys.argv[1])
    tree2 = TreeNode.read(sys.argv[2])

    # tip counts
    counts = [x.count(tips=True) for x in (tree1, tree2)]
    print('Taxa in tree 1: %d.' % counts[0])
    print('Taxa in tree 2: %d.' % counts[1])

    # shared taxon count
    shared = tree1.subset().intersection(tree2.subset())
    print('Shared taxa: %d.' % len(shared))

    # subsets (sets of tip names under each clade)
    ss = tree1.compare_subsets(tree2, exclude_absent_taxa=True)
    print('Subsets: %f.' % ss)

    # Robinson-Foulds distance
    rfd = tree1.compare_rfd(tree2)
    rfdf = rfd / len(list(tree1.non_tips()) + list(tree2.non_tips()))
    print('RF distance: %d (%f).' % (rfd, rfdf))

    # tip-to-tip distance matrix (slow)
    if len(sys.argv) > 3 and sys.argv[3] == '-t':
        td = tree1.compare_tip_distances(tree2)
        print('Tip distance: %f.' % td)

    if rfd == 0.0:
        # internal node names
        ct = compare_topology(tree1, tree2)
        print('Internal node names are %s.'
              % ('identical' if ct else 'different'))

        # branch lengths
        cbr = compare_branch_lengths(tree1, tree2)
        print('Branch lengths of matching nodes are %s.'
              % ('identical' if cbr else 'different'))
Пример #60
0
    def test__balance_basis_unbalanced(self):
        tree = u"((a,b)c, d);"
        t = TreeNode.read([tree])

        exp_basis = np.array(
            [[-np.sqrt(1. / 6), -np.sqrt(1. / 6),
              np.sqrt(2. / 3)], [-np.sqrt(1. / 2),
                                 np.sqrt(1. / 2), 0]])
        exp_keys = [t.name, t[0].name]
        res_basis, res_keys = _balance_basis(t)

        npt.assert_allclose(exp_basis, res_basis)
        self.assertListEqual(exp_keys, res_keys)