def test_add_second_level(self): starting_point = "({A},{B}){A B};" expected_tree = "({A},({B},{C}){B C}){A B C};" phy_tree = PhyTree(loads(starting_point)) phy_tree.add_to_group("C", "{B}") compare_nodes(self, phy_tree.get_newick()[0], loads(expected_tree)[0])
def test_prune_node(): tree = '(A,(B,(C,D)E)F)G;' t1 = loads(tree)[0] t1.prune_by_names(["C", "D", "E"]) t2 = loads(tree)[0] t2.prune_by_names(["E"]) assert t1.newick == t2.newick
def test_create_new_tree(self): starting_root = "{};" first_leaf = "{A};" phy_tree = PhyTree(loads(starting_root)) phy_tree.add_to_group("A", "{}") compare_nodes(self, phy_tree.get_newick()[0], loads(first_leaf)[0])
def test_prune_node(self): tree = '(A,(B,(C,D)E)F)G;' t1 = loads(tree)[0] t1.prune_by_names(["C", "D", "E"]) t2 = loads(tree)[0] t2.prune_by_names(["E"]) self.assertEqual(t1.newick, t2.newick)
def test_Node(self): with self.assertRaises(ValueError): Node(name='A)') root = loads('(A,B,(C,D)E)F;')[0] self.assertEqual( [n.name for n in root.walk()], ['F', 'A', 'B', 'E', 'C', 'D']) self.assertEqual( [n.name for n in root.walk() if n.is_leaf], ['A', 'B', 'C', 'D']) self.assertEqual( [n.name for n in root.walk(mode='postorder')], ['A', 'B', 'C', 'D', 'E', 'F']) self.assertEqual(root.ancestor, None) self.assertEqual(root.descendants[0].ancestor, root) root = loads('(((a,b),(c,d)),e);')[0] self.assertEqual( [n.ancestor.newick for n in root.walk() if n.ancestor], [ '(((a,b),(c,d)),e)', '((a,b),(c,d))', '(a,b)', '(a,b)', '((a,b),(c,d))', '(c,d)', '(c,d)', '(((a,b),(c,d)),e)'])
def test_stacked_redundant_node_removal(self): tree = loads("(((((A,B))),C))")[0] tree.remove_redundant_nodes(preserve_lengths=False) self.assertEqual(tree.newick, "(C,(A,B))") tree = loads("(((A,B):1):2)")[0] tree.remove_redundant_nodes() self.assertEqual(tree.newick, '(A,B):3.0')
def test_consensus_two_trees(self): first = PhyTree(loads("({A},({B},({C},{D}){C D}){B C D}){A B C D};")) second = PhyTree(loads("(({A},{B}){A B},({C},{D}){C D}){A B C D};")) result = consensus([first, second], .5) compare_nodes(self, result.get_newick()[0], loads("({A},{B},({C},{D}){C D}){A B C D};")[0])
def test_stacked_redundant_node_removal(): tree = loads("(((((A,B))),C))")[0] tree.remove_redundant_nodes(preserve_lengths=False) assert tree.newick == "(C,(A,B))" tree = loads("(((A,B):1):2)")[0] tree.remove_redundant_nodes() assert tree.newick == '(A,B):3.0'
def test_stacked_redundant_node_removal(self): tree = loads("(((((A,B))),C))")[0] tree.remove_redundant_nodes(preserve_lengths=False) self.assertEqual(tree.newick, "(C,(A,B))") tree = loads("(((A,B):1):2)")[0] tree.remove_redundant_nodes() self.assertEqual(tree.newick, '(A,B):3')
def test_prune(): tree = loads('(A,((B,C),(D,E)))')[0] leaves = set(tree.get_leaf_names()) prune_nodes = set(["A", "C", "E"]) tree.prune_by_names(prune_nodes) assert set(tree.get_leaf_names()) == leaves - prune_nodes tree = loads('((A,B),((C,D),(E,F)))')[0] tree.prune_by_names(prune_nodes, inverse=True) assert set(tree.get_leaf_names()) == prune_nodes
def test_prune(self): tree = loads('(A,((B,C),(D,E)))')[0] leaves = set(tree.get_leaf_names()) prune_nodes = set(["A", "C", "E"]) tree.prune_by_names(prune_nodes) self.assertEqual(set(tree.get_leaf_names()), leaves - prune_nodes) tree = loads('((A,B),((C,D),(E,F)))')[0] tree.prune_by_names(prune_nodes, inverse=True) self.assertEqual(set(tree.get_leaf_names()), prune_nodes)
def test_loads(self): """parse examples from https://en.wikipedia.org/wiki/Newick_format""" with self.assertRaises(ValueError): loads('(),;') with self.assertRaises(ValueError): loads(');') root = loads('(,,(,));')[0] self.assertIsNone(root.name) self.assertEqual(root.descendants[0].length, None) self.assertEqual(len(root.descendants), 3) root = loads('(A,B,(C,D));')[0] self.assertIsNone(root.name) self.assertEqual(len(root.descendants), 3) root = loads('(A,B,(C,D)E)Fäß;')[0] self.assertEqual(root.name, 'Fäß') self.assertEqual(len(root.descendants), 3) root = loads('(:0.1,:0.2,(:0.3,:0.4):0.5);')[0] self.assertIsNone(root.name) self.assertEqual(root.descendants[0].length, 0.1) self.assertEqual(len(root.descendants), 3) root = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] self.assertEqual(root.name, 'A') self.assertEqual(root.descendants[-1].length, 0.1) self.assertEqual(len(root.descendants), 1)
def test_loads(self): """parse examples from https://en.wikipedia.org/wiki/Newick_format""" with self.assertRaises(ValueError): loads('(),;') with self.assertRaises(ValueError): loads(');') root = loads('(,,(,));')[0] self.assertIsNone(root.name) self.assertEqual(root.descendants[0].length, 0.0) self.assertEqual(len(root.descendants), 3) root = loads('(A,B,(C,D));')[0] self.assertIsNone(root.name) self.assertEqual(len(root.descendants), 3) root = loads('(A,B,(C,D)E)Fäß;')[0] self.assertEqual(root.name, 'Fäß') self.assertEqual(len(root.descendants), 3) root = loads('(:0.1,:0.2,(:0.3,:0.4):0.5);')[0] self.assertIsNone(root.name) self.assertEqual(root.descendants[0].length, 0.1) self.assertEqual(len(root.descendants), 3) root = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] self.assertEqual(root.name, 'A') self.assertEqual(root.descendants[-1].length, 0.1) self.assertEqual(len(root.descendants), 1)
def test_loads(): """parse examples from https://en.wikipedia.org/wiki/Newick_format""" with pytest.raises(ValueError): loads('(),;') with pytest.raises(ValueError): loads(');') root = loads('(,,(,));')[0] assert root.name is None assert root.descendants[0].length == 0.0 assert len(root.descendants) == 3 root = loads('(A,B,(C,D));')[0] assert root.name is None assert len(root.descendants) == 3 root = loads('(A,B,(C,D)E)Fäß;')[0] assert root.name == 'Fäß' assert len(root.descendants) == 3 root = loads('(:0.1,:0.2,(:0.3,:0.4):0.5);')[0] assert root.name is None assert root.descendants[0].length == 0.1 assert len(root.descendants) == 3 root = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] assert root.name == 'A' assert root.descendants[-1].length == 0.1 assert len(root.descendants) == 1
def test_polytomy_resolution(self): tree = loads('(A,B,(C,D,(E,F)))')[0] self.assertFalse(tree.is_binary) tree.resolve_polytomies() self.assertEqual(tree.newick, '(A,((C,((E,F),D):0),B):0)') self.assertTrue(tree.is_binary) tree = loads('(A,B,C,D,E,F)')[0] self.assertFalse(tree.is_binary) tree.resolve_polytomies() self.assertEqual(tree.newick, '(A,(F,(B,(E,(C,D):0):0):0):0)') self.assertTrue(tree.is_binary)
def test_polytomy_resolution(): tree = loads('(A,B,(C,D,(E,F)))')[0] assert not tree.is_binary tree.resolve_polytomies() assert tree.newick == '(A,((C,((E,F),D):0.0),B):0.0)' assert tree.is_binary tree = loads('(A,B,C,D,E,F)')[0] assert not tree.is_binary tree.resolve_polytomies() assert tree.newick == '(A,(F,(B,(E,(C,D):0.0):0.0):0.0):0.0)' assert tree.is_binary
def test_polytomy_resolution(self): tree = loads('(A,B,(C,D,(E,F)))')[0] self.assertFalse(tree.is_binary) tree.resolve_polytomies() self.assertEqual(tree.newick, '(A,((C,((E,F),D):0.0),B):0.0)') self.assertTrue(tree.is_binary) tree = loads('(A,B,C,D,E,F)')[0] self.assertFalse(tree.is_binary) tree.resolve_polytomies() self.assertEqual(tree.newick, '(A,(F,(B,(E,(C,D):0.0):0.0):0.0):0.0)') self.assertTrue(tree.is_binary)
def test_consensus_multiple_trees(self): trees = [ PhyTree(loads("({A},({B},({C},{D}){C D}){B C D}){A B C D};")), PhyTree(loads("({A},{B},({C},{D}){C D}){A B C D};")), PhyTree(loads("({A},{B},{C},{D}){A B C D};")), PhyTree(loads("(({A},{B}){A B},({C},{D}){C D}){A B C D};")), ] result = consensus(trees, .6) compare_nodes(self, result.get_newick()[0], loads("({A},{B},({C},{D}){C D}){A B C D};")[0])
def test_no_lengths_equiv(self): for ts in ( self.all_nodes_samples_example(), self.only_internal_samples_example(), self.mixed_node_samples_example(), ): for t in ts.trees(): newick_nolengths = t.newick(include_branch_lengths=False) newick_nolengths = newick.loads(newick_nolengths)[0] newick_lengths = t.newick() newick_lengths = newick.loads(newick_lengths)[0] for node in newick_lengths.walk(): node.length = None assert newick.dumps(newick_nolengths) == newick.dumps( newick_lengths)
def test_Node_ascii_art(): assert loads('(A,(B,C)D)Ex;')[0].ascii_art(strict=True) == """\ /-A --Ex-| | /-B \-D--| \-C""" assert loads('(A,(B,C)D)Ex;')[0].ascii_art(strict=True, show_internal=False) == """\ /-A ----| | /-B \---| \-C""" assert loads('(A,B,C)D;')[0].ascii_art(strict=True, show_internal=False) == """\
def test_check_group(self): tree = loads("({A},({B},{C}){B C},{D}){A B C D};") try: PhyTree._check_group(tree[0]) except ValueError: raise AssertionError()
def test_correct_tree(self): tree = loads("({A},({B},({C},{D}){C D}){B C D},{E}){A B C D E};") try: PhyTree(tree) except ValueError: raise AssertionError()
def from_newick_list(X): """ Create a list of `PhyloTree` objects from a list of Newick codes entered as a string. :param X: a string representing a list of Newick codes. :return: [`PhyloTree`] instance. """ return [newick_node_to_tree(n) for n in newick.loads(X)]
def test_all_node_labels(self): tree = msprime.simulate(5, random_seed=2).first() labels = {u: f"x_{u}" for u in tree.nodes()} ns = tree.newick(node_labels=labels) root = newick.loads(ns)[0] assert root.name == labels[tree.root] assert sorted([n.name for n in root.walk()]) == sorted(labels.values())
async def processNewickTree(self, label, newickText, log=logToConsole): tree, newR, seenR, newL, seenL = self.processNewickNode(label, newick.loads(newickText)[0]) self.addTree(label, tree, 'newick') await log('Finished parsing newick tree') await log('New primitives: %d, Observed existing primitives: %d' % (newR, seenR)) await log('New links: %d, Observed existing links: %d' % (newL, seenL)) return (newR, seenR, newL, seenL)
def verify_newick_topology(self, tree, root=None, node_labels=None, include_branch_lengths=True): if root is None: root = tree.root ns = tree.newick( precision=16, root=root, node_labels=node_labels, include_branch_lengths=include_branch_lengths, ) if node_labels is None: leaf_labels = {u: str(u + 1) for u in tree.leaves(root)} else: leaf_labels = {u: node_labels[u] for u in tree.leaves(root)} # default newick lib outputs 0.0 if length is None => replace the length_parser newick_tree = newick.loads(ns, length_parser=lambda x: None if x is None else float(x))[0] leaf_names = newick_tree.get_leaf_names() assert sorted(leaf_names) == sorted(leaf_labels.values()) for u in tree.leaves(root): name = leaf_labels[u] node = newick_tree.get_node(name) while u != root: branch_len = tree.branch_length( u) if include_branch_lengths else None self.assertAlmostEqual(node.length, branch_len) node = node.ancestor u = tree.parent(u) assert node.ancestor is None
def glottolog_tree(): """Download and parse the Glottolog language tree.""" def parse_node(x): """Parse each node in the Newick tree.""" node_pattern = """^'? (?P<name> .* ) [ ] \[ (?P<glottocode> [a-z0-9]{8} ) \] (?: \[ (?P<iso_639_3> [a-z]{3} ) \] ) ? (?P<language> -l- ) ? '?$""" m = re.match(node_pattern, x, re.X) if m is not None: out = m.groupdict() out['language'] = out['language'] is not None return out def walk_tree(x): """Walk the language tree.""" node = parse_node(x.name) node['children'] = [walk_tree(n) for n in x.descendants] return node url = URLS['glottolog-newick'] r = requests.get(url) tree = newick.loads(r.text) return [walk_tree(branch) for branch in tree]
def from_newick_list(nwk): """ Create a list of `Shape` objects from a list of Newick codes entered as a string. :param nwk: a string representing a list of Newick codes. :return: `list` instance. """ return [newick_node_to_shape(n) for n in _newick.loads(nwk)]
def rescale_newick(trees_str): import math trees = newick.loads(trees_str) lmin = float_info.max lmax = -float_info.max for tree in trees: for n in tree.walk(): if n.length > lmax: lmax = n.length if n.length < lmin and not n.length == 0: lmin = n.length factor = 1 / lmin for tree in trees: for n in tree.walk(): n.length = n.length * 4411532 if n.length < 0.1: n.length = 0 elif n.length <= 1: pass else: n.length = math.sqrt(n.length) return newick.dumps(trees)
def from_newick(nwk): """ Create a `Shape` object from a Newick code entered as a string. :param nwk: a string representing a Newick code. :return: `Shape` instance. """ return newick_node_to_shape(_newick.loads(nwk)[0])
def test_newick_lib_parsing(self): newick_tree = newick.loads(self.tree().as_newick())[0] leaf_names = newick_tree.get_leaf_names() assert len(leaf_names) == 3 assert "n1" in leaf_names assert "n2" in leaf_names assert None in leaf_names
def test_two_layer_tree(self): tree = loads("((l1c0c0,l1c0c1)l0c0,(l1c1c0,l1c1c1)l0c1)Root;") positions = _position_vertices(tree[0], 1) space_first_layer = .5 space_second_layer = .25 root_position = 0 self.assertIsInstance(positions, list) self.assertEqual(len(positions), 7) self.assertTupleEqual(positions[0], (0., root_position)) self.assertTupleEqual(positions[1], (1., root_position + 0 * space_first_layer)) self.assertTupleEqual(positions[2], (2., root_position + 0 * space_first_layer + 0 * space_second_layer)) self.assertTupleEqual(positions[3], (2., root_position + 0 * space_first_layer + 1 * space_second_layer)) self.assertTupleEqual(positions[4], (1., root_position + 1 * space_first_layer)) self.assertTupleEqual(positions[5], (2., root_position + 1 * space_first_layer + 0 * space_second_layer)) self.assertTupleEqual(positions[6], (2., root_position + 1 * space_first_layer + 1 * space_second_layer))
def parse_newick(tree, branch_length_multiplier): """ Parses the newick tree and annotates the resulting nodes with their time values, appropriately scaled. """ # Parse the newick tree string. parsed = newick.loads(tree) if len(parsed) == 0: raise ValueError(f"Not a valid newick tree: '{tree}'") root = parsed[0] # Set node depths (distances from root). stack = [(root, 0)] num_nodes = 0 max_depth = 0 while len(stack) > 0: node, depth = stack.pop() if depth > max_depth: max_depth = depth num_nodes += 1 node.depth = depth for child in node.descendants: stack.append((child, depth + child.length)) if num_nodes < 3: raise ValueError("Newick tree must have at least three nodes") # Set node times (distances from present). for node in root.walk(): node.time = (max_depth - node.depth) * branch_length_multiplier return root
def from_newick(X): """ Create a `PhyloTree` object from a Newick code entered as a string. :param X: a string representing a Newick code. :return: `PhyloTree` instance. """ return newick_node_to_tree(newick.loads(X)[0])
def test_get_node(): tree = loads('(A,B,(C,D)E)F;')[0] assert tree.get_node("A").name == 'A' assert len(tree.get_node('E').get_leaves()) == 2 # rename tree.get_node('E').name = 'G' assert tree.newick == '(A,B,(C,D)G)F'
def test_dumps(*trees): for ex in [ '(,,(,));', '(A,B,(C,D));', '(A,B,(C,D)E)F;', '(:0.1,:0.2,(:0.3,:0.4):0.5);', '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', ]: assert ex == dumps(loads(ex)[0])
def test_dumps(self, *trees): for ex in [ '(,,(,));', '(A,B,(C,D));', '(A,B,(C,D)E)F;', '(:0.1,:0.2,(:0.3,:0.4):0.5);', '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;', ]: self.assertEqual(ex, dumps(loads(ex)[0]))
def test_Node_custom_length(self): root = Node.create(length=100., length_formatter="{:0.1e}".format) self.assertEqual(root.newick, ':1.0e+02') weird_numbers_tree = "((a:1.e2,b:3j),(c:0x0BEFD6B0,d:003))" root = loads(weird_numbers_tree, length_parser=None)[0] self.assertEqual(weird_numbers_tree, root.newick) with self.assertRaises(ValueError): root = Node.create(length=1., length_formatter="({:0.1e})".format) root.newick
def test_Node(): with pytest.raises(ValueError): Node(name='A)') root = loads('(A,B,(C,D)E)F;')[0] assert [n.name for n in root.walk()] == ['F', 'A', 'B', 'E', 'C', 'D'] assert [n.name for n in root.walk() if n.is_leaf] == ['A', 'B', 'C', 'D'] assert [n.name for n in root.walk(mode='postorder')] == ['A', 'B', 'C', 'D', 'E', 'F'] assert root.ancestor is None assert root.descendants[0].ancestor == root root = loads('(((a,b),(c,d)),e);')[0] assert [n.ancestor.newick for n in root.walk() if n.ancestor] == \ [ '(((a,b),(c,d)),e)', '((a,b),(c,d))', '(a,b)', '(a,b)', '((a,b),(c,d))', '(c,d)', '(c,d)', '(((a,b),(c,d)),e)']
def handle_starting_tree(self): """ Makes any changes to the user-provided starting tree required to make it suitable for passing to BEAST. In particular, this method checks that the supplied string or the contents of the supplied file: * seems to be a valid Newick tree * contains no duplicate taxa * has taxa which are a superset of the languages in the analysis * has no polytomies or unifurcations. """ if os.path.exists(self.starting_tree): with io.open(self.starting_tree, encoding="UTF-8") as fp: self.starting_tree = fp.read().strip() if self.starting_tree: # Make sure starting tree can be parsed try: tree = newick.loads(self.starting_tree)[0] except: raise ValueError("Could not parse starting tree. Is it valid Newick?") # Make sure starting tree contains no duplicate taxa tree_langs = [n.name for n in tree.walk() if n.is_leaf] if not len(set(tree_langs)) == len(tree_langs): dupes = [l for l in tree_langs if tree_langs.count(l) > 1] dupestring = ",".join(["%s (%d)" % (d, tree_langs.count(d)) for d in dupes]) raise ValueError("Starting tree contains duplicate taxa: %s" % dupestring) tree_langs = set(tree_langs) # Make sure languges in tree is a superset of languages in the analysis if not tree_langs.issuperset(self.languages): missing_langs = set(self.languages).difference(tree_langs) miss_string = ",".join(missing_langs) raise ValueError("Some languages in the data are not in the starting tree: %s" % miss_string) # If the trees' language set is a proper superset, prune the tree to fit the analysis if not tree_langs == self.languages: tree.prune_by_names(self.languages, inverse=True) self.messages.append("[INFO] Starting tree includes languages not present in any data set and will be pruned.") # Get the tree looking nice tree.remove_redundant_nodes() tree.resolve_polytomies() # Replace the starting_tree from the config with the new one self.starting_tree = newick.dumps(tree)
def test_Node_ascii_art_singleton(): assert loads('((A,B)C)Ex;')[0].ascii_art(strict=True) == """\
def test_leaf_functions(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] leaf_names = set(tree.get_leaf_names()) true_names = set(["B", "C", "D"]) assert leaf_names == true_names
def test_redundant_node_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] assert len(tree.descendants) == 1 tree.remove_redundant_nodes() assert not any([len(n.descendants) == 1 for n in tree.walk()])
def test_prune_single_node_tree(): tree = loads('A')[0] tree.prune(tree.get_leaves()) assert tree.newick == 'A'
def test_prune_and_node_removal(): tree2 = loads("((A:1,B:1):1,C:1)")[0] tree2.prune_by_names(['A']) assert tree2.newick == '((B:1):1,C:1)' tree2.remove_redundant_nodes() assert tree2.newick == '(C:1,B:2.0)'
def test_comments(): t = '[&R] (A,B)C [% ] [% ] [% setBetweenBits = selected ];' with pytest.raises(ValueError): loads(t) tree = loads(t, strip_comments=True)[0] assert len(list(tree.walk())) == 3
def test_length_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_lengths() assert dumps(tree) == '((B,(C,D)E)F)A;'
def test_comments(self): t = '[&R] (A,B)C [% ] [% ] [% setBetweenBits = selected ];' with self.assertRaises(ValueError): loads(t) tree = loads(t, strip_comments=True)[0] self.assertEqual(len(list(tree.walk())), 3)
def isNewick(string): try: newick_data = loads(string) except ValueError, e: return False
def test_leaf_name_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_leaf_names() assert dumps(tree) == '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;'
def test_internal_name_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_internal_names() assert dumps(tree) == '((B:0.2,(C:0.3,D:0.4):0.5):0.1);'
def test_singletons(): tree = loads('(((((A), B), (C, D))), E);')[0] assert len(list(tree.walk())) == 11 tree.remove_redundant_nodes() assert len(list(tree.walk())) == 9
def test_all_removal(): tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0] tree.remove_names() tree.remove_lengths() topology_only = dumps(tree) assert topology_only == '((,(,)));'