Пример #1
0
class TestNodeDescendantsFunctionality(unittest.TestCase):
    def setUp(self):
        self.test_obj = Node("A", "1.0")
        self.test_descendant = Node("D", "2.0")
        self.lengths = ["2.0", "3.0", "4.0"]

    @data(["D1.1", "D1.2", "D1.3"], ["D", "", ""], ["", "", ""])
    def test_node_representation_with_deeper_descendants(self, test_data):
        """
        :param test_data: names of descendants

        Procedure:
        1. Make simple tree with one descendant having two another descendants inside
        2. Verify if it's newick representation is correct in comparision to parsed
        "proper_result"

        :return:
        """
        single_nodes_reprs = [
            "{0}:{1}".format(name, length)
            for name, length in zip(test_data, self.lengths)]
        proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs)

        d1, d2, d3 = [Node(name, length) for name, length in zip(test_data, self.lengths)]
        d1.add_descendant(d2)
        d1.add_descendant(d3)
        self.test_obj.add_descendant(d1)
        self.assertEqual(proper_result, self.test_obj.newick)

    def test_node_as_descendants_list(self):
        self.test_obj.add_descendant(self.test_descendant)
        self.assertListEqual([self.test_descendant], self.test_obj.descendants)
Пример #2
0
class TestNodeDescendantsFunctionality(unittest.TestCase):
    def setUp(self):
        self.test_obj = Node("A", "1.0")
        self.test_descendant = Node("D", "2.0")
        self.lengths = ["2.0", "3.0", "4.0"]

    @data(["D1.1", "D1.2", "D1.3"], ["D", "", ""], ["", "", ""])
    def test_node_representation_with_deeper_descendants(self, test_data):
        """
        :param test_data: names of descendants

        Procedure:
        1. Make simple tree with one descendant having two another descendants inside
        2. Verify if it's newick representation is correct in comparision to parsed
        "proper_result"

        :return:
        """
        single_nodes_reprs = [
            "{0}:{1}".format(name, length)
            for name, length in zip(test_data, self.lengths)
        ]
        proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs)

        d1, d2, d3 = [
            Node(name, length) for name, length in zip(test_data, self.lengths)
        ]
        d1.add_descendant(d2)
        d1.add_descendant(d3)
        self.test_obj.add_descendant(d1)
        self.assertEqual(proper_result, self.test_obj.newick)

    def test_node_as_descendants_list(self):
        self.test_obj.add_descendant(self.test_descendant)
        self.assertListEqual([self.test_descendant], self.test_obj.descendants)
Пример #3
0
 def newick_node(self, nodes=None, template=None):
     template = template or self._newick_default_template
     n = Node(name=template.format(l=self), length='1')  # noqa: E741
     children = self.children if nodes is None else self.children_from_nodemap(
         nodes)
     for nn in sorted(children, key=lambda nn: nn.name):
         n.add_descendant(nn.newick_node(nodes=nodes, template=template))
     return n
Пример #4
0
 def test_Node_custom_length(self):
     root = Node.create(length=100., length_formatter="{:0.1e}".format)
     self.assertEqual(root.newick, ':1.0e+02')
     weird_numbers_tree = "((a:1.e2,b:3j),(c:0x0BEFD6B0,d:003))"
     
     root = loads(weird_numbers_tree, length_parser=None)[0]
     self.assertEqual(weird_numbers_tree, root.newick)
     
     with self.assertRaises(ValueError):
         root = Node.create(length=1., length_formatter="({:0.1e})".format)
         root.newick
Пример #5
0
 def newick_node(self, nodes=None):
     label = '{0} [{1}]'.format(
         self.name.replace(',', '/').replace('(', '{').replace(')', '}'),
         self.id)
     if self.iso:
         label += '[%s]' % self.iso
     if self.level == Level.language:
         label += '-l-'
     n = Node(name="'{0}'".format(label), length='1')
     children = self.children if nodes is None else self.children_from_nodemap(
         nodes)
     for nn in sorted(children, key=lambda nn: nn.name):
         n.add_descendant(nn.newick_node(nodes=nodes))
     return n
Пример #6
0
    def test_Node(self):
        with self.assertRaises(ValueError):
            Node(name='A)')

        root = loads('(A,B,(C,D)E)F;')[0]
        self.assertEqual(
            [n.name for n in root.walk()],
            ['F', 'A', 'B', 'E', 'C', 'D'])
        self.assertEqual(
            [n.name for n in root.walk() if n.is_leaf],
            ['A', 'B', 'C', 'D'])
        self.assertEqual(
            [n.name for n in root.walk(mode='postorder')],
            ['A', 'B', 'C', 'D', 'E', 'F'])
        self.assertEqual(root.ancestor, None)
        self.assertEqual(root.descendants[0].ancestor, root)
        root = loads('(((a,b),(c,d)),e);')[0]
        self.assertEqual(
            [n.ancestor.newick for n in root.walk() if n.ancestor],
            [
                '(((a,b),(c,d)),e)',
                '((a,b),(c,d))',
                '(a,b)',
                '(a,b)',
                '((a,b),(c,d))',
                '(c,d)',
                '(c,d)',
                '(((a,b),(c,d)),e)'])
Пример #7
0
 def test_node_with_parameters(self, test_set):
     if "length" in test_set:
         proper_length = 3.0
     else:
         proper_length = 0.0
     test_obj = Node(**test_set)
     self.assertEqual(test_set["name"], test_obj.name)
     self.assertEqual(proper_length, test_obj.length)
Пример #8
0
def upgma(distance_matrix, names=None):
    """Cluster based on distance matrix dist using UPGMA

    That is, the Unweighted Pair Group Method with Arithmetic Mean algorithm

    If node names are given (not None), they must be a sequence of the same
    length as the size of the square distance_matrix.

    The edge lengths in the tree are not useful for the time being.
    """

    # Initialize nodes
    nodes = [Node(name) for name in (names or range(len(distance_matrix)))]

    # Iterate until the number of clusters is k
    nc = len(distance_matrix)
    while nc > 1:
        # Calculate the pairwise distance of each cluster, while searching for pair with least distance
        minimum_distance = numpy.inf
        i, j = 0, 1
        for i in range(nc - 1):
            for j in range(i + 1, nc):
                dis = distance_matrix[i, j]
                if dis < minimum_distance:
                    minimum_distance = dis
                    cluster = nodes[i], nodes[j]
                    indices = i, j
        # Merge these two nodes into one new node

        i, j = indices
        distance_matrix[i] = 0.5 * (distance_matrix[i]) + 0.5 * (
            distance_matrix[j])
        distance_matrix[:, i] = 0.5 * (distance_matrix[:, i]) + 0.5 * (
            distance_matrix[:, j])
        nodes[i] = Node.create(descendants=cluster)
        for c in cluster:
            c.length = distance_matrix[i, i]

        distance_matrix = numpy.delete(distance_matrix, j, 0)
        distance_matrix = numpy.delete(distance_matrix, j, 1)
        del nodes[j]

        nc -= 1
    return nodes[0]
Пример #9
0
def rename_none_node(node_to_rename: newick.Node, counter):
    """
    Renaming node with no name to differ from other not named node.
    :param node_to_rename: node to be checked
    :param counter: int; counter for none nodes
    :return: (Node, int)
    """
    if node_to_rename.name is None:
        node_to_rename.name = str(node_to_rename.name) + "_" + str(counter)
        counter += 1
    return node_to_rename, counter
Пример #10
0
    def newick_node(self, nodes=None, template=None, maxlevel=None, level=0) -> Node:
        """
        Return a `newick.Node` representing the subtree of the Glottolog classification starting
        at the languoid.

        :param template: Python format string accepting the `Languoid` instance as single \
        variable named `l`, used to format node labels.
        """
        template = template or self._newick_default_template
        n = Node(name=template.format(l=self), length='1')  # noqa: E741

        children = self.children if nodes is None else self.children_from_nodemap(nodes)
        for nn in sorted(children, key=lambda nn: nn.name):
            if maxlevel:
                if (isinstance(maxlevel, config.LanguoidLevel) and nn.level > maxlevel) or \
                        (not isinstance(maxlevel, config.LanguoidLevel) and level > maxlevel):
                    continue
            n.add_descendant(
                nn.newick_node(nodes=nodes, template=template, maxlevel=maxlevel, level=level + 1))
        return n
Пример #11
0
def phytree_from_groups(groups: List[set]):
    class TreeBlock:
        name: set
        contains: List[str]

        def __init__(self, name: set):
            self.name = name
            self.contains = []

        def name_to_str(self) -> str:
            name_str = re.sub(r"[,']", "", str(sorted(self.name))).replace(
                "[", "{").replace("]", "}")
            return name_str

        def __str__(self):
            return f"Group: {self.name}, Contains: {self.contains}"

    cluster_sets = [
        set(filter(lambda x: re.match(r'[a-zA-Z0-9]+', x), set(c)))
        for c in groups
    ]
    cluster_sets = sorted(cluster_sets, key=lambda x: len(x), reverse=True)
    blocks = [TreeBlock(s) for s in cluster_sets]
    for b in blocks:
        for other_block in blocks:
            if b.name > other_block.name:
                b.contains.append(other_block.name_to_str())
    blocks = list(sorted(blocks, key=lambda b: len(b.contains)))
    nodes: List[Node] = []
    for b in blocks:
        node = Node(b.name_to_str())
        for d in b.contains:
            descendant = list(filter(lambda n: n.name == d, nodes))
            if len(descendant) > 0:
                node.descendants.append(descendant[0])
                node.descendants = list(
                    sorted(node.descendants, key=lambda d: d.name))
                nodes.remove(descendant[0])
        nodes.append(node)
    return PhyTree(nodes)
Пример #12
0
    def _convert_to_phyloxml(self,
                             seq_id_to_seq_name: Dict[SequenceID,
                                                      str] = None) -> str:
        if not self.nodes:
            return None

        newick_str = self._convert_to_newick(seq_id_to_seq_name)
        tree = Phylo.read(StringIO(newick_str), 'newick')
        Phylo.write(tree, 'drzewko.xml', 'phyloxml')
        tree_xml = Phylo.PhyloXMLIO.read("drzewko.xml")
        sorted_nodes = sorted(self.nodes, key=lambda x: x.consensus_id)

        nodes_to_process = [(None, sorted_nodes[0])]
        newick_tree = None
        while nodes_to_process:
            n = nodes_to_process.pop()
            node_parent_label = n[0]
            node = n[1]

            if seq_id_to_seq_name:
                label = seq_id_to_seq_name[node.sequences_ids[0]] if len(
                    node.sequences_ids
                ) == 1 else f"Consenses {node.consensus_id}"
            else:
                label = node.sequences_ids[0].value if len(
                    node.sequences_ids
                ) == 1 else f"Consensus {node.consensus_id}"
            if node.parent_node_id is None:
                length = "1"
            else:
                parent_minComp = sorted_nodes[
                    node.parent_node_id].mincomp.root_value().value
                length = str((1 - parent_minComp) -
                             (1 - node.mincomp.root_value().value))

            newick_node = Node(name=label, length=length)

            if newick_tree is None:
                newick_tree = newick_node
            else:
                parent_node = newick_tree.get_node(node_parent_label)
                parent_node.add_descendant(newick_node)

            for child in node.children_nodes_ids:
                nodes_to_process.append((label, sorted_nodes[child]))

        return dumps(newick_tree)
Пример #13
0
    def test_node_representation_with_deeper_descendants(self, test_data):
        """
        :param test_data: names of descendants

        Procedure:
        1. Make simple tree with one descendant having two another descendants inside
        2. Verify if it's newick representation is correct in comparision to parsed
        "proper_result"

        :return:
        """
        single_nodes_reprs = [
            "{0}:{1}".format(name, length)
            for name, length in zip(test_data, self.lengths)]
        proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs)

        d1, d2, d3 = [Node(name, length) for name, length in zip(test_data, self.lengths)]
        d1.add_descendant(d2)
        d1.add_descendant(d3)
        self.test_obj.add_descendant(d1)
        self.assertEqual(proper_result, self.test_obj.newick)
Пример #14
0
def test_Node():
    with pytest.raises(ValueError):
        Node(name='A)')

    root = loads('(A,B,(C,D)E)F;')[0]
    assert [n.name for n in root.walk()] == ['F', 'A', 'B', 'E', 'C', 'D']
    assert [n.name for n in root.walk() if n.is_leaf] == ['A', 'B', 'C', 'D']
    assert [n.name for n in root.walk(mode='postorder')
            ] == ['A', 'B', 'C', 'D', 'E', 'F']
    assert root.ancestor is None
    assert root.descendants[0].ancestor == root
    root = loads('(((a,b),(c,d)),e);')[0]
    assert [n.ancestor.newick for n in root.walk() if n.ancestor] == \
        [
            '(((a,b),(c,d)),e)',
            '((a,b),(c,d))',
            '(a,b)',
            '(a,b)',
            '((a,b),(c,d))',
            '(c,d)',
            '(c,d)',
            '(((a,b),(c,d)),e)']
Пример #15
0
 def clone_node(n):
     c = Node(name=n.name)
     for nn in n.descendants:
         c.add_descendant(clone_node(nn))
     return c
Пример #16
0
 def test_node_length_changeability(self):
     test_obj = Node(length="10")
     self.assertEqual(10, test_obj.length)
     test_obj.length = "12"
     self.assertEqual(12, test_obj.length)
Пример #17
0
 def test_node_parameters_changeability(self):
     test_obj = Node(name="A")
     self.assertEqual("A", test_obj.name)
     test_obj.name = "B"
     self.assertEqual("B", test_obj.name)
Пример #18
0
 def test_node_newick_representation_with_length(self):
     test_obj = Node(name="A", length="3")
     self.assertEqual("A:3", test_obj.newick)
Пример #19
0
 def test_node_parameters_changeability(self):
     test_obj = Node(name="A")
     self.assertEqual("A", test_obj.name)
     test_obj.name = "B"
     self.assertEqual("B", test_obj.name)
Пример #20
0
 def setUp(self):
     self.test_obj = Node()
Пример #21
0
def test_Node_custom_length():
    root = Node.create(length='1e2', length_parser=lambda l: l + 'i')
    assert root.length == '1e2i'
    root = Node.create(length_formatter=lambda l: 5)
    root.length = 10
    assert root.length == pytest.approx(5)
Пример #22
0
def test_repr():
    n = Node(name="A")
    assert repr(n) == 'Node("A")'
Пример #23
0
 def test_node_length_changeability(self):
     test_obj = Node(length="10")
     self.assertEqual(10, test_obj.length)
     test_obj.length = "12"
     self.assertEqual(12, test_obj.length)
Пример #24
0
 def setUp(self):
     self.test_obj = Node("A", "1.0")
     self.test_descendant = Node("D", "2.0")
     self.lengths = ["2.0", "3.0", "4.0"]
Пример #25
0
    def _convert_to_newick(self,
                           seq_id_to_metadata: Dict[SequenceID,
                                                    str] = None) -> str:
        def newick_nhx(newick_tree):
            """The representation of the Node in Newick format."""
            label = newick_tree.name or ''
            if newick_tree._length:
                for cn in self.nodes:
                    if str(cn.consensus_id) == newick_tree.name:
                        if seq_id_to_metadata:
                            if len(cn.sequences_ids) == 1:
                                name = seq_id_to_metadata[
                                    cn.sequences_ids[0]]["name"]
                                group = seq_id_to_metadata[
                                    cn.sequences_ids[0]]["group"]
                                seqid = cn.sequences_ids[0]
                                metadata = f"[&&NHX:name={name}:group={group}:seqid={seqid}:mincomp={cn.mincomp}]"
                            else:
                                name = f"Consensus {cn.consensus_id}"
                                metadata = f"[&&NHX:name={name}:mincomp={cn.mincomp}]"
                        else:
                            if len(cn.sequences_ids) == 1:
                                name = cn.sequences_ids[0]
                            else:
                                name = f"Consensus {cn.consensus_id}"
                            mincomp = cn.mincomp
                            metadata = f"[&&NHX:name={name}:mincomp={mincomp}]"
                label += ':' + newick_tree._length + metadata
            descendants = ','.join(
                [newick_nhx(n) for n in newick_tree.descendants])
            if descendants:
                descendants = '(' + descendants + ')'
            return descendants + label

        if not self.nodes:
            return None

        sorted_nodes = sorted(self.nodes, key=lambda x: x.consensus_id)

        nodes_to_process = [(None, sorted_nodes[0])]
        newick_tree = None
        while nodes_to_process:
            n = nodes_to_process.pop()
            node_parent_label = n[0]
            node = n[1]

            label = str(node.consensus_id)
            if node.parent_node_id is None:
                length = "1"
            else:
                parent_minComp = sorted_nodes[
                    node.parent_node_id].mincomp.root_value().value
                length = str((1 - parent_minComp) -
                             (1 - node.mincomp.root_value().value))

            newick_node = Node(name=label, length=length)

            if newick_tree is None:
                newick_tree = newick_node
            else:
                parent_node = newick_tree.get_node(node_parent_label)
                parent_node.add_descendant(newick_node)

            for child in node.children_nodes_ids:
                nodes_to_process.append((label, sorted_nodes[child]))
        return "(" + newick_nhx(newick_tree) + ")"
Пример #26
0
 def test_repr(self):
     n = Node(name="A")
     self.assertEqual(repr(n), 'Node("A")')
Пример #27
0
 def setUp(self):
     self.test_obj = Node("A", "1.0")
     self.test_descendant = Node("D", "2.0")
     self.lengths = ["2.0", "3.0", "4.0"]
Пример #28
0
def node():
    return Node()
Пример #29
0
def test_Node_custom_length():
    root = Node.create(length='1e2', length_parser=lambda l: l + 'i')
    assert root.length == '1e2i'
    root = Node.create(length_formatter=lambda l: 5)
    root.length = 10
    assert root.length == pytest.approx(5)
Пример #30
0
 def clone_node(n):
     c = Node(name=n.name)
     for nn in n.descendants:
         c.add_descendant(clone_node(nn))
     return c
Пример #31
0
    def _add_leaf(node: Node, target: str, leaf: str):
        if PhyTree._is_group(node.name):
            if node.name == target:
                node.add_descendant(Node(f"{{{leaf}}}"))
            else:
                if PhyTree._is_group(target):
                    expected_leaves = set(PhyTree._get_group_leaves(target))
                else:
                    expected_leaves = {target}
                for child in node.descendants:
                    if PhyTree._is_group(child.name):
                        child_leaves = set(
                            PhyTree._get_group_leaves(child.name))
                        if expected_leaves <= child_leaves:
                            PhyTree._add_leaf(child, target, leaf)
                            break
                    elif PhyTree._is_leaf(child.name):
                        if child.name in expected_leaves:
                            PhyTree._add_leaf(child, target, leaf)
                    else:
                        raise ValueError(f"Couldn't find {target}")

        elif PhyTree._is_leaf(node.name):
            if node.name == target:
                if node.name != '{}':
                    node.add_descendant(Node(node.name))
                    node.add_descendant(Node(f"{{{leaf}}}"))
                else:
                    node.name = f"{{{leaf}}}"
                    return
            else:
                raise ValueError(f"Unexpected leaf: {node.name}")
        else:
            raise ValueError(f"Couldn't recognize {leaf} as a leaf or a group")
        node.name = node.name.replace("}", f" {leaf}}}")
Пример #32
0
 def test_Node_custom_length(self):
     root = Node.create(length='1e2', length_parser=lambda l: l + 'i')
     self.assertEqual(root.length, '1e2i')
     root = Node.create(length_formatter=lambda l: 5)
     root.length = 10
     self.assertAlmostEqual(root.length, 5)
Пример #33
0
 def test_Node_custom_length(self):
     root = Node.create(length='1e2', length_parser=lambda l: l + 'i')
     self.assertEqual(root.length, '1e2i')
     root = Node.create(length_formatter=lambda l: 5)
     root.length = 10
     self.assertAlmostEqual(root.length, 5)