Exemplo n.º 1
0
 def test_get_vtree_consistent_multicolors(self):
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     self.assertFalse(tree.multicolors_are_up_to_date)
     vtree_consistent_multicolors = tree.get_vtree_consistent_multicolors()
     self.assertTrue(tree.multicolors_are_up_to_date)
     self.assertIsInstance(vtree_consistent_multicolors, list)
     self.assertTrue(vtree_consistent_multicolors,
                     tree.vtree_consistent_multicolors)
     self.assertFalse(
         vtree_consistent_multicolors is tree.vtree_consistent_multicolors)
     for obtained_mc, stored_mc in zip(vtree_consistent_multicolors,
                                       tree.vtree_consistent_multicolors):
         self.assertFalse(obtained_mc is stored_mc)
     self.assertSetEqual(
         {
             mc.hashable_representation
             for mc in vtree_consistent_multicolors
         }, tree.vtree_consistent_multicolors_set)
     self.assertEqual(len(vtree_consistent_multicolors), 10)
     ref_vtree_consistent_multicolors = [
         Multicolor(),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                    self.bg_v5),
         Multicolor(self.bg_v1),
         Multicolor(self.bg_v2),
         Multicolor(self.bg_v3),
         Multicolor(self.bg_v4),
         Multicolor(self.bg_v5),
         Multicolor(self.bg_v1, self.bg_v2),
         Multicolor(self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3),
     ]
     for multicolor in ref_vtree_consistent_multicolors:
         self.assertIn(multicolor, vtree_consistent_multicolors)
Exemplo n.º 2
0
 def test_has_node(self):
     # tree has a O(1) method to check if a node is present in a tree
     tree = BGTree("(v1, v2)root;")
     self.assertTrue(tree.has_node(self.v1))
     self.assertTrue(tree.has_node(self.v2))
     self.assertTrue(tree.has_node("root"))
     self.assertFalse(tree.has_node(self.v4))
Exemplo n.º 3
0
 def test_get_tree_consistent_multicolors_with_non_default_leaf_wrapper(
         self):
     tree = BGTree("(v1, v2)root;", leaf_wrapper=lambda name: name)
     tree_consistent_multicolors = tree.get_tree_consistent_multicolors()
     ref_multicolors = [
         Multicolor(self.v1),
         Multicolor(self.v2),
         Multicolor(),
         Multicolor(self.v1, self.v2)
     ]
     self.assertEqual(len(tree_consistent_multicolors), 4)
     for mc in tree_consistent_multicolors:
         self.assertIn(mc, ref_multicolors)
Exemplo n.º 4
0
 def test_edge_length(self):
     # every edge in a tree has a length
     # if no specific length, was specified on edge addition, a default value (1) is stored for this edge
     tree = BGTree("(v1:5)v2;")
     self.assertEqual(
         tree.get_distance(node1_name=self.v1, node2_name=self.v2), 5)
     self.assertEqual(
         tree.get_distance(node1_name=self.v2, node2_name=self.v1), 5)
     # edge_length lookup is available only for existing edges, thus both vertices have to be present
     # and an edge between them must exist
     with self.assertRaises(ValueError):
         tree.get_distance(node1_name=self.v1, node2_name=self.v3)
     with self.assertRaises(ValueError):
         tree.get_distance(node1_name=self.v3, node2_name=self.v4)
     with self.assertRaises(ValueError):
         tree.get_distance(node1_name=self.v3, node2_name=self.v4)
Exemplo n.º 5
0
 def test_add_edge(self):
     # an edge supports an operation to add a new edge (branch) to the tree
     # if vertices of specified edge were not present in the tree, they are added automatically
     tree = BGTree("(v1:5)v2;")
     tree.multicolors_are_up_to_date = True
     tree.add_edge(node1_name=self.v1, node2_name=self.v3)
     self.assertFalse(tree.multicolors_are_up_to_date)
     self.assertEqual(len(list(tree.nodes())), 3)
     self.assertEqual(len(list(tree.edges())), 2)
     self.assertEqual(tree.get_distance(self.v1, self.v2), 5)
     self.assertEqual(tree.get_distance(self.v1, self.v3), 1)
     self.assertEqual(tree.get_distance(self.v2, self.v3), 6)
     self.assertFalse(tree.multicolors_are_up_to_date)
Exemplo n.º 6
0
 def test_edges_non_binary_tree(self):
     tree = BGTree(newick="((a, b, c), (d, e, (f, g, e)));",
                   leaf_wrapper=BGGenome)
     edges = list(tree.edges())
     self.assertEqual(len(edges), 11)
     for edge in edges:
         self.assertIsInstance(edge, tuple)
         self.assertEqual(len(edge), 2)
         self.assertTrue(
             isinstance(edge[0], BGGenome) or isinstance(edge[0], TreeNode))
         self.assertTrue(
             isinstance(edge[1], BGGenome) or isinstance(edge[1], TreeNode))
     leaf_edges = [
         edge for edge in edges
         if isinstance(edge[0], BGGenome) or isinstance(edge[1], BGGenome)
     ]
     self.assertEqual(len(leaf_edges), 8)
Exemplo n.º 7
0
 def test_add_edge_explicit_edge_length(self):
     # when an edge is added, one can explicitly set its length
     tree = BGTree("(v1)v2;")
     tree.multicolors_are_up_to_date = True
     tree.add_edge(node1_name=self.v2, node2_name=self.v3, edge_length=5)
     self.assertFalse(tree.multicolors_are_up_to_date)
     self.assertEqual(len(list(tree.nodes())), 3)
     self.assertEqual(len(list(tree.edges())), 2)
     self.assertEqual(tree.get_distance(self.v1, self.v3), 6)
     self.assertEqual(tree.get_distance(self.v2, self.v3), 5)
     self.assertFalse(tree.multicolors_are_up_to_date)
Exemplo n.º 8
0
    def run(self, manager):
        manager.logger.info("Reading blocks orders data")
        file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"]
        bg = BreakpointGraph()
        for file_path in file_paths:
            with open(file_path, "rt") as source:
                bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False)
        manager.data["gos-asm"]["bg"] = bg

        manager.logger.info("Reading phylogenetic tree information")
        tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"])
        manager.data["gos-asm"]["phylogenetic_tree"] = tree

        full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]])
        manager.data["gos-asm"]["target_multicolor"] = full_tmc
        vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc,
                                                                      guidance=tree.vtree_consistent_multicolors,
                                                                      account_for_color_multiplicity_in_guidance=False)

        for target_multicolor in vtree_consistent_target_multicolors[:]:
            for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors):
                if vtree_c_multicolor <= target_multicolor \
                        and vtree_c_multicolor not in vtree_consistent_target_multicolors \
                        and len(vtree_c_multicolor.colors) > 0:
                    vtree_consistent_target_multicolors.append(vtree_c_multicolor)

        vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors,
                                                     key=lambda mc: len(mc.hashable_representation),
                                                     reverse=True)

        all_target_multicolors = vtree_consistent_target_multicolors[:]
        # for i in range(2, len(vtree_consistent_target_multicolors) + 1):
        #     for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i):
        #         comb = list(comb)
        #         for mc1, mc2 in itertools.combinations(comb, 2):
        #             if len(mc1.intersect(mc2).colors) > 0:
        #                 break
        #         else:
        #             new_mc = Multicolor()
        #             for mc in comb:
        #                 new_mc += mc
        #             all_target_multicolors.append(new_mc)
        hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors}
        all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in
                                  hashed_vertex_tree_consistent_multicolors]
        all_target_multicolors = sorted(all_target_multicolors,
                                        key=lambda mc: len(mc.hashable_representation),
                                        reverse=True)
        manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors
        # log_bg_stats(bg=bg, logger=manager.logger)

        manager.logger.info("Reading repeats-bridges information")
        manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance(
            file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
Exemplo n.º 9
0
 def test_get_tree_consistent_multicolors(self):
     # with no account for wgd root specification is irrelevant
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     self.assertFalse(tree.multicolors_are_up_to_date)
     tree_consistent_multicolors = tree.get_tree_consistent_multicolors()
     self.assertTrue(tree.multicolors_are_up_to_date)
     self.assertIsInstance(tree_consistent_multicolors, list)
     self.assertTrue(tree_consistent_multicolors,
                     tree.tree_consistent_multicolors)
     self.assertFalse(
         tree_consistent_multicolors is tree.tree_consistent_multicolors)
     for obtained_mc, stored_mc in zip(tree_consistent_multicolors,
                                       tree.tree_consistent_multicolors):
         self.assertFalse(obtained_mc is stored_mc)
     self.assertSetEqual(
         {mc.hashable_representation
          for mc in tree_consistent_multicolors},
         tree.tree_consistent_multicolors_set)
     self.assertEqual(len(tree_consistent_multicolors), 16)
     ref_tree_consistent_multicolors = [
         Multicolor(),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                    self.bg_v5),
         Multicolor(self.bg_v1),
         Multicolor(self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v2),
         Multicolor(self.bg_v1, self.bg_v3, self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v3),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v4),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v5),
         Multicolor(self.bg_v5),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4),
         Multicolor(self.bg_v1, self.bg_v2),
         Multicolor(self.bg_v3, self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3),
     ]
     for multicolor in ref_tree_consistent_multicolors:
         self.assertIn(multicolor, tree_consistent_multicolors)
Exemplo n.º 10
0
 def test_is_multicolor_vtree_consistent_non_binary_tree(self):
     tree = BGTree("(v1, v2, v3);")
     self.assertTrue(
         tree.multicolor_is_vtree_consistent(Multicolor(self.bg_v1)))
     self.assertTrue(
         tree.multicolor_is_vtree_consistent(Multicolor(self.bg_v2)))
     self.assertTrue(
         tree.multicolor_is_vtree_consistent(Multicolor(self.bg_v3)))
     self.assertTrue(
         tree.multicolor_is_vtree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)))
     self.assertFalse(
         tree.multicolor_is_vtree_consistent(
             Multicolor(self.bg_v1, self.bg_v2)))
     self.assertFalse(
         tree.multicolor_is_vtree_consistent(
             Multicolor(self.bg_v1, self.bg_v3)))
     self.assertFalse(
         tree.multicolor_is_vtree_consistent(
             Multicolor(self.bg_v3, self.bg_v2)))
Exemplo n.º 11
0
 def test_is_bgedge_vtree_consistent(self):
     v1, v2 = "v1", "v2"
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=Multicolor())
     ##########################################################################################
     #
     # bgedge with an empty multicolor complies with any tree
     #
     ##########################################################################################
     mc = Multicolor()
     bgedge.multicolor = mc
     self.assertTrue(BGTree("(v1, v2);").bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # simple cases
     #
     ##########################################################################################
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     bgedge.multicolor = Multicolor(self.bg_v1)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # a small v1, v2 subtree, still consistent
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # bigger v1, v2, v3 subtree, still consistent
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one)
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, BGGenome("v6"))
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # other cases for a non wgd tree
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v4)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v3, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v4, self.bg_v5)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v5, self.bg_v4)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v4, self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
Exemplo n.º 12
0
    def test_has_edge_no_direction(self):
        tree = BGTree("((v1, v2:5)v3, v4)root;")
        self.assertTrue(
            tree.has_edge(self.v3, self.v1, account_for_direction=False))
        self.assertTrue(
            tree.has_edge(self.v1, self.v3, account_for_direction=False))
        self.assertTrue(
            tree.has_edge(self.v3, self.v2, account_for_direction=False))
        self.assertTrue(
            tree.has_edge(self.v2, self.v3, account_for_direction=False))
        self.assertTrue(
            tree.has_edge("root", self.v3, account_for_direction=False))
        self.assertTrue(
            tree.has_edge(self.v3, "root", account_for_direction=False))
        self.assertTrue(
            tree.has_edge("root", self.v4, account_for_direction=False))
        self.assertTrue(
            tree.has_edge(self.v4, "root", account_for_direction=False))

        self.assertFalse(
            tree.has_edge(self.v1, self.v2, account_for_direction=False))
        self.assertFalse(
            tree.has_edge(self.v2, self.v1, account_for_direction=False))
        self.assertFalse(
            tree.has_edge(self.v3, self.v4, account_for_direction=False))
        self.assertFalse(
            tree.has_edge(self.v4, self.v3, account_for_direction=False))
        self.assertFalse(
            tree.has_edge(self.v1, self.v4, account_for_direction=False))
        self.assertFalse(
            tree.has_edge(self.v4, self.v1, account_for_direction=False))
Exemplo n.º 13
0
    def test_has_edge_direction(self):
        tree = BGTree("((v1, v2:5)v3, v4)root;")
        self.assertTrue(tree.has_edge(self.v3, self.v1))
        self.assertFalse(tree.has_edge(self.v1, self.v3))
        self.assertTrue(tree.has_edge(self.v3, self.v2))
        self.assertFalse(tree.has_edge(self.v2, self.v3))
        self.assertTrue(tree.has_edge("root", self.v3))
        self.assertFalse(tree.has_edge(self.v3, "root"))
        self.assertTrue(tree.has_edge("root", self.v4))
        self.assertFalse(tree.has_edge(self.v4, "root"))

        self.assertFalse(tree.has_edge(self.v1, self.v2))
        self.assertFalse(tree.has_edge(self.v2, self.v1))
        self.assertFalse(tree.has_edge(self.v3, self.v4))
        self.assertFalse(tree.has_edge(self.v4, self.v3))
        self.assertFalse(tree.has_edge(self.v1, self.v4))
        self.assertFalse(tree.has_edge(self.v4, self.v1))
Exemplo n.º 14
0
 def test_is_multicolor_tree_consistent(self):
     # tests if supplied multicolor complies with tree topology
     ##########################################################################################
     #
     # empty multicolor complies with any tree
     #
     ##########################################################################################
     mc = Multicolor()
     self.assertTrue(BGTree().multicolor_is_tree_consistent(mc))
     ##########################################################################################
     #
     # simple cases
     #
     ##########################################################################################
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     self.assertTrue(
         tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1)))
     ##########################################################################################
     #
     # a small v1, v2 subtree, still consistent
     #
     ##########################################################################################
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2)))
     ##########################################################################################
     #
     # bigger v1, v2, v3 subtree, still consistent
     #
     ##########################################################################################
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)))
     ##########################################################################################
     #
     # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one)
     #
     ##########################################################################################
     self.assertFalse(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v2, self.bg_v3)))
     ##########################################################################################
     #
     # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree
     #
     ##########################################################################################
     self.assertFalse(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, BGGenome("v6"))))
     ##########################################################################################
     #
     # other cases for a non wgd tree
     #
     ##########################################################################################
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v4, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v3, self.bg_v4, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                        self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v5, self.bg_v4)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v3, self.bg_v4, self.bg_v5)))
     self.assertFalse(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v3, self.bg_v5)))
Exemplo n.º 15
0
 def test_edges_empty_tree(self):
     tree = BGTree()
     self.assertEqual(len(list(tree.edges())), 0)
Exemplo n.º 16
0
    def test_is_multicolor_vtree_consistent(self):
        mc = Multicolor()
        self.assertTrue(BGTree().multicolor_is_vtree_consistent(mc))

        tree = BGTree("(((v1, v2), v3), (v4, v5));")
        self.assertTrue(
            tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v4, self.bg_v5)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                           self.bg_v5)))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v2, self.bg_v3)))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, BGGenome("v6"))))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v3, self.bg_v5)))
Exemplo n.º 17
0
 def test_append_tree_copy(self):
     tree1 = BGTree("(v1, v2)root;")
     tree2 = BGTree("(v4, v5)v3;")
     tree1.multicolors_are_up_to_date = True
     tree2.multicolors_are_up_to_date = True
     tree1.append(node_name=self.v1, tree=tree2, copy=True)
     #####
     self.assertFalse(tree1.multicolors_are_up_to_date)
     self.assertEqual(len(list(tree1.nodes())), 6)
     self.assertEqual(len(list(tree1.edges())), 5)
     self.assertTrue(tree1.has_edge(node1_name=self.v1, node2_name=self.v3))
     #####
     self.assertTrue(tree2.multicolors_are_up_to_date)
     self.assertEqual(len(list(tree2.nodes())), 3)
     self.assertEqual(len(list(tree2.edges())), 2)
     tree1.get_node_by_name("v5").name = "new_v5"
     self.assertTrue(tree2.has_node("v5"))