def test_merging_incorrect(self): # cases when vertices in two supplied for the merging edges are not consistent v1 = BlockVertex("v1") v2 = BlockVertex("v2") v3 = BlockVertex("v3") v4 = BlockVertex("v4") multicolor = Multicolor(self.genome3) multicolor1 = Multicolor(self.genome2) edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) edge2 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor1) with self.assertRaises(ValueError): BGEdge.merge(edge1, edge2) edge2 = BGEdge(vertex1=v3, vertex2=v2, multicolor=multicolor1) with self.assertRaises(ValueError): BGEdge.merge(edge1, edge2) edge2 = BGEdge(vertex1=v3, vertex2=v4, multicolor=multicolor1) with self.assertRaises(ValueError): BGEdge.merge(edge1, edge2) edge2 = BGEdge(vertex1=v1, vertex2=v1, multicolor=multicolor1) with self.assertRaises(ValueError): BGEdge.merge(edge1, edge2) edge2 = BGEdge(vertex1=v2, vertex2=v2, multicolor=multicolor1) with self.assertRaises(ValueError): BGEdge.merge(edge1, edge2) edge2 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor1) with self.assertRaises(ValueError): BGEdge.merge(edge1, edge2)
def test_get_vtree_consistent_multicolors(self): tree = BGTree("(((v1, v2), v3),(v4, v5));") self.assertFalse(tree.multicolors_are_up_to_date) vtree_consistent_multicolors = tree.get_vtree_consistent_multicolors() self.assertTrue(tree.multicolors_are_up_to_date) self.assertIsInstance(vtree_consistent_multicolors, list) self.assertTrue(vtree_consistent_multicolors, tree.vtree_consistent_multicolors) self.assertFalse( vtree_consistent_multicolors is tree.vtree_consistent_multicolors) for obtained_mc, stored_mc in zip(vtree_consistent_multicolors, tree.vtree_consistent_multicolors): self.assertFalse(obtained_mc is stored_mc) self.assertSetEqual( { mc.hashable_representation for mc in vtree_consistent_multicolors }, tree.vtree_consistent_multicolors_set) self.assertEqual(len(vtree_consistent_multicolors), 10) ref_vtree_consistent_multicolors = [ Multicolor(), Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5), Multicolor(self.bg_v1), Multicolor(self.bg_v2), Multicolor(self.bg_v3), Multicolor(self.bg_v4), Multicolor(self.bg_v5), Multicolor(self.bg_v1, self.bg_v2), Multicolor(self.bg_v4, self.bg_v5), Multicolor(self.bg_v1, self.bg_v2, self.bg_v3), ] for multicolor in ref_vtree_consistent_multicolors: self.assertIn(multicolor, vtree_consistent_multicolors)
def test__gt__and__ge__(self): # multicolor are compared as follows: # for all the colors in the left argument of comparison, checks that multiplicity of that color in right argument is # greater (greater-equal) mc1 = Multicolor(self.genome1, self.genome2, self.genome1) mc2 = Multicolor(self.genome1, self.genome2) self.assertTrue(mc1 > mc2) self.assertTrue(mc1 >= mc2) mc2 = Multicolor(self.genome1, self.genome1, self.genome2) self.assertFalse(mc1 > mc2) self.assertTrue(mc1 >= mc2) mc2 = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2) self.assertFalse(mc1 > mc2) self.assertFalse(mc1 >= mc2) ############################################################################################### # # Multicolor object is never greater or equal to the non-Multicolor object # ############################################################################################### for non_multicolor_object in [1, (1, ), [ 1, ], "1", Mock()]: self.assertFalse(mc1 >= non_multicolor_object) self.assertFalse(mc1 > non_multicolor_object)
def test_hashable_representation(self): # every multicolor has to have a hashable representation, that can be utilized in a set/dict # for a fast check against multicolor instance ################################################ # the idea is to use sorted Counter.elements() method and convert into sorted tuple on the fly genome_list = [ self.genome1, self.genome2, self.genome3, self.genome4, self.genome1, self.genome2, self.genome1 ] mc = Multicolor(*genome_list) ref_tuple = tuple(sorted(genome_list)) result = mc.hashable_representation self.assertTrue(isinstance(result, tuple)) self.assertTupleEqual(result, ref_tuple) mc1 = Multicolor(*result) self.assertEqual(mc, mc1) # non-equal multicolors shall have different hashable representations mc1 = Multicolor(*genome_list[:-2]) mc2 = Multicolor(*genome_list[:-1]) self.assertNotEqual(mc1, mc2) self.assertNotEqual(mc1.hashable_representation, mc2.hashable_representation) # there shall be no errors or exceptions raised while taking hash of hashable_representation result = mc.hashable_representation self.assertEqual(hash(result), hash(ref_tuple))
def test_equality(self): # edges are called equal if they connect same pairs of vertices and have same multicolor assigned to them v1 = BlockVertex("v1") v2 = BlockVertex("v2") v3 = BlockVertex("v3") v4 = BlockVertex("v4") multicolor = Multicolor(self.genome3) multicolor1 = Multicolor(self.genome2) edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) edge2 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor1) edge3 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor1) edge4 = BGEdge(vertex1=v3, vertex2=v4, multicolor=multicolor) self.assertNotEqual(edge1, edge2) self.assertNotEqual(edge1, edge3) self.assertNotEqual(edge2, edge3) self.assertNotEqual(edge1, edge4) edge4 = BGEdge(vertex1=v2, vertex2=v1, multicolor=multicolor) edge5 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) self.assertEqual(edge1, edge4) self.assertEqual(edge1, edge5) self.assertEqual(edge4, edge5) self.assertNotEqual(edge1, 5) edge6 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor) self.assertNotEqual(edge1, edge6) self.assertEqual(edge1, edge4) edge4.data = {"fragment": {"name": 1}} edge1.data = {"fragment": {"name": 2}} self.assertNotEqual(edge1, edge4) edge1.data = {"fragment": {"name": 1}} self.assertEqual(edge1, edge4)
def __update_consistent_multicolors(self): """ Internally used method, that recalculates T-consistent / VT-consistent multicolors for current tree topology """ v_t_consistent_multicolors = self.__get_v_tree_consistent_leaf_based_hashable_multicolors( ) hashed_vtree_consistent_leaves_multicolors = { mc.hashable_representation for mc in v_t_consistent_multicolors } self.vtree_consistent_multicolors_set = hashed_vtree_consistent_leaves_multicolors self.vtree_consistent_multicolors = [ Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_vtree_consistent_leaves_multicolors ] result = [] # T-consistent multicolors can be viewed as VT-consistent multicolors united with all of their complements full_multicolor = v_t_consistent_multicolors[0] for multicolor in v_t_consistent_multicolors: result.append(multicolor) result.append(full_multicolor - multicolor) hashed_tree_consistent_leaves_multicolors = { mc.hashable_representation for mc in result } self.tree_consistent_multicolors_set = hashed_tree_consistent_leaves_multicolors self.tree_consistent_multicolors = [ Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_tree_consistent_leaves_multicolors ] self.multicolors_are_up_to_date = True
def test_is_multicolor_vtree_consistent(self): mc = Multicolor() self.assertTrue(BGTree().multicolor_is_vtree_consistent(mc)) tree = BGTree("(((v1, v2), v3), (v4, v5));") self.assertTrue( tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v4, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v2, self.bg_v3))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, BGGenome("v6")))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v3, self.bg_v5)))
def run(self, manager): manager.logger.info("Reading blocks orders data") file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"] bg = BreakpointGraph() for file_path in file_paths: with open(file_path, "rt") as source: bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False) manager.data["gos-asm"]["bg"] = bg manager.logger.info("Reading phylogenetic tree information") tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"]) manager.data["gos-asm"]["phylogenetic_tree"] = tree full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]]) manager.data["gos-asm"]["target_multicolor"] = full_tmc vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc, guidance=tree.vtree_consistent_multicolors, account_for_color_multiplicity_in_guidance=False) for target_multicolor in vtree_consistent_target_multicolors[:]: for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors): if vtree_c_multicolor <= target_multicolor \ and vtree_c_multicolor not in vtree_consistent_target_multicolors \ and len(vtree_c_multicolor.colors) > 0: vtree_consistent_target_multicolors.append(vtree_c_multicolor) vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) all_target_multicolors = vtree_consistent_target_multicolors[:] # for i in range(2, len(vtree_consistent_target_multicolors) + 1): # for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i): # comb = list(comb) # for mc1, mc2 in itertools.combinations(comb, 2): # if len(mc1.intersect(mc2).colors) > 0: # break # else: # new_mc = Multicolor() # for mc in comb: # new_mc += mc # all_target_multicolors.append(new_mc) hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors} all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_vertex_tree_consistent_multicolors] all_target_multicolors = sorted(all_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors # log_bg_stats(bg=bg, logger=manager.logger) manager.logger.info("Reading repeats-bridges information") manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance( file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
def test_single_initialization(self): # simple case initialization where only one genome with multiplicity one is supplied mc = Multicolor(self.genome1) self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertSetEqual({self.genome1}, mc.colors) self.assertEqual(mc.multicolors[self.genome1], 1) mc = Multicolor(self.genome1) self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertSetEqual({self.genome1}, mc.colors) self.assertEqual(mc.multicolors[self.genome1], 1)
def test_get_tree_consistent_multicolors_with_non_default_leaf_wrapper( self): tree = BGTree("(v1, v2)root;", leaf_wrapper=lambda name: name) tree_consistent_multicolors = tree.get_tree_consistent_multicolors() ref_multicolors = [ Multicolor(self.v1), Multicolor(self.v2), Multicolor(), Multicolor(self.v1, self.v2) ] self.assertEqual(len(tree_consistent_multicolors), 4) for mc in tree_consistent_multicolors: self.assertIn(mc, ref_multicolors)
def test_iter_over_colors_json_ids(self): # when multiedge is serialized into json a list of colors in it referenced by their ids # the multiplicity of colors has to be preserved v1 = BlockVertex("v1") v2 = BlockVertex("v2") genomes = [ self.genome1, self.genome1, self.genome2, self.genome3, self.genome2 ] multicolor = Multicolor(*genomes) bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) json_ids = bgedge.colors_json_ids self.assertTrue(isinstance(json_ids, list)) json_ids_list = json_ids self.assertEqual(len(json_ids_list), 5) ref_json_ids = Counter(genome.json_id for genome in genomes) res_json_ids = Counter(json_ids_list) self.assertDictEqual(ref_json_ids, res_json_ids) # case when color objects are not a BGGenome, but some other hashable object without json_id attribute v1 = BlockVertex("v1") v2 = BlockVertex("v2") colors = ["red", "red", "green", "black", "yellow", "green"] multicolor = Multicolor(*colors) bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) json_ids = bgedge.colors_json_ids self.assertTrue(isinstance(json_ids, list)) json_ids_list = json_ids self.assertEqual(len(json_ids_list), 6) ref_json_ids = Counter(hash(genome) for genome in colors) res_json_ids = Counter(json_ids_list) self.assertDictEqual(ref_json_ids, res_json_ids) # case when color objects are mixed objects: BGGenome objects, just hashable, have json_id but not BGGenome v1 = BlockVertex("v1") v2 = BlockVertex("v2") mock1, mock2 = Mock(), Mock() mock1.json_id = 5 mock2.json_id = 6 colors = [ self.genome1, mock1, self.genome2, "black", mock2, self.genome2 ] multicolor = Multicolor(*colors) bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) json_ids = bgedge.colors_json_ids self.assertTrue(isinstance(json_ids, list)) json_ids_list = list(json_ids) self.assertEqual(len(json_ids_list), 6) ref_json_ids = Counter( genome.json_id if hasattr(genome, "json_id") else hash(genome) for genome in colors) res_json_ids = Counter(json_ids_list) self.assertDictEqual(ref_json_ids, res_json_ids)
def test_get_breakpoint_from_file_with_comment_data_string(self): data = [ "", "\t", "#comment1", ">genome_name_1", " #comment1", "# data :: fragment : name = chromosome_X", "a b $", " #comment1 ", "\t>genome_name_2", "#data::fragment:name=scaffold111", "a $", "", "\n\t" ] file_like = io.StringIO("\n".join(data)) result_bg = GRIMMReader.get_breakpoint_graph(file_like, merge_edges=False) self.assertTrue(isinstance(result_bg, BreakpointGraph)) self.assertEqual(len(list(result_bg.connected_components_subgraphs())), 3) self.assertEqual(len(list(result_bg.edges())), 5) self.assertEqual(len(list(result_bg.nodes())), 7) multicolors = [ Multicolor(BGGenome("genome_name_1")), Multicolor(BGGenome("genome_name_2")) ] condensed_multicolors = [ Multicolor(BGGenome("genome_name_1")), Multicolor(BGGenome("genome_name_2")), Multicolor(BGGenome("genome_name_1"), BGGenome("genome_name_2")) ] for bgedge in result_bg.edges(): self.assertTrue(bgedge.multicolor in multicolors) for bgedge in result_bg.edges(): condensed_edge = result_bg.get_condensed_edge( vertex1=bgedge.vertex1, vertex2=bgedge.vertex2) self.assertTrue(condensed_edge.multicolor in condensed_multicolors) infinity_edges = [ bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge ] self.assertEqual(len(infinity_edges), 4) for bgedge in result_bg.edges(): data = bgedge.data self.assertIn("fragment", data) self.assertIsInstance(data["fragment"], dict) self.assertIn("name", data["fragment"]) self.assertIn(data["fragment"]["name"], {"chromosome_X", "scaffold111"}) ah = result_bg.get_vertex_by_name("ah") bt = result_bg.get_vertex_by_name("bt") ahi = result_bg.get_vertex_by_name("ah__infinity") edge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=bt) self.assertTupleEqual(edge.data["fragment"]["forward_orientation"], (ah, bt)) iedge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=ahi) self.assertTupleEqual(iedge.data["fragment"]["forward_orientation"], (ah, ahi))
def test_data_update_non_dict_source(self): edge = BGEdge( vertex1=TaggedBlockVertex("v1"), vertex2=TaggedBlockVertex("v2"), multicolor=Multicolor(self.genome1, self.genome2), data={"fragment": { "name": "scaffold2", "origin": "test" }}) for source in [1, "2", Multicolor(), (1, ), [ 2, ]]: with self.assertRaises(ValueError): edge.update_data(source=source)
def test_update(self): # multicolor can be updated by multiple arguments # they shall add information about colors (if color was not present before) and/or their multiplicity # change is inplace mc = Multicolor() self.assertSetEqual(set(), mc.colors) mc.update(self.genome1) self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertSetEqual({self.genome1}, mc.colors) mc.update(self.genome2, self.genome3) self.assertEqual(len(mc.colors), 3) self.assertEqual(len(mc.multicolors), 3) self.assertSetEqual({self.genome1, self.genome2, self.genome3}, mc.colors) for color in mc.multicolors: self.assertEqual(mc.multicolors[color], 1) mc.update(self.genome1) self.assertEqual(len(mc.colors), 3) self.assertEqual(len(mc.multicolors), 3) for color in mc.multicolors: if color == self.genome1: self.assertEqual(mc.multicolors[color], 2) else: self.assertEqual(mc.multicolors[color], 1) self.assertSetEqual({self.genome1, self.genome2, self.genome3}, mc.colors)
def test_merging_correct(self): # two BGEdges can be merged together into a third, separate BGEdge # that would contain information from both supplied BGEdges in terms of colors and multiplicities # such merge is allowed only if a pair of vertices in both BGEdges is the same # ordering of vertices if not a concern, since edges in BreakpointGraph are not directed v1 = BlockVertex("v1") v2 = BlockVertex("v2") multicolor = Multicolor(self.genome3) multicolor1 = Multicolor(self.genome2) edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) edge2 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor1) merged_edge = BGEdge.merge(edge1, edge2) self.assertEqual(merged_edge.vertex1, v1) self.assertEqual(merged_edge.vertex2, v2) self.assertEqual(merged_edge.multicolor, multicolor + multicolor1)
def test_json_serialization(self): # simple case of serialization, single color, no multiplicity v1, v2 = BlockVertex("v1"), BlockVertex("v2") color1 = BGGenome("genome1") multicolor = Multicolor(color1) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) ref_result = { "vertex1_id": v1.json_id, "vertex2_id": v2.json_id, "multicolor": [color1.json_id] } self.assertDictEqual(edge.to_json(schema_info=False), ref_result) # case where multiple colors are present, multiplicity is 1 for every of them color2 = BGGenome("genome2") multicolor = Multicolor(color1, color2) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) result = edge.to_json() self.assertTrue(isinstance(result, dict)) self.assertEqual(result["vertex1_id"], v1.json_id) self.assertEqual(result["vertex2_id"], v2.json_id) self.assertSetEqual(set(result["multicolor"]), {color1.json_id, color2.json_id}) self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY], edge.json_schema_name) # case where multiple colors are present, multiplicity is both 1 and greater than 1 color3 = BGGenome("genome3") multicolor = Multicolor(color1, color1, color1, color2, color2, color3) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) result = edge.to_json(schema_info=False) self.assertTrue(isinstance(result, dict)) self.assertEqual(result["vertex1_id"], v1.json_id) self.assertEqual(result["vertex2_id"], v2.json_id) self.assertSetEqual(set(result["multicolor"]), {color1.json_id, color2.json_id, color3.json_id}) self.assertDictEqual( Counter(result["multicolor"]), Counter(color.json_id for color in multicolor.multicolors.elements())) # weird case when a vertex1/vertex attribute in edge is not an instance of BGVertex # and moreover it does not have "json_id" attribute edge = BGEdge(vertex1=v1, vertex2=1, multicolor=Multicolor(color1)) result = edge.to_json() self.assertTrue(isinstance(result, dict)) self.assertEqual(result["vertex1_id"], v1.json_id) self.assertEqual(result["vertex2_id"], hash(1)) self.assertListEqual(result["multicolor"], [color1.json_id]) self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY], edge.json_schema_name)
def test__sub__(self): # - operator os overloaded to provide "delete" alike behaviour, but with a creation of a new Multicolor instance # only Multicolor instance is supported as an argument # for any other argument type a TypeError is raised mc1 = Multicolor(self.genome1, self.genome3, self.genome1, self.genome2) mc2 = Multicolor(self.genome3, self.genome2, self.genome5) mc3 = mc1 - mc2 self.assertEqual(len(mc3.colors), 1) self.assertEqual(len(mc3.multicolors), 1) self.assertEqual(mc3.multicolors[self.genome1], 2) self.assertSetEqual({self.genome1}, mc3.colors) mc4 = Multicolor() - mc2 self.assertEqual(mc4, Multicolor()) with self.assertRaises(TypeError): mc1 - 5
def test__isub__(self): # -= operator is overloaded and support only Multicolor instance as an argument # for any other argument a TypeError is raised # behalves just like the "delete" method mc1 = Multicolor(self.genome1, self.genome3, self.genome1, self.genome2) mc2 = Multicolor(self.genome3, self.genome2, self.genome5) mc1_id = id(mc1) mc1 -= mc2 self.assertEqual(len(mc1.colors), 1) self.assertEqual(len(mc1.multicolors), 1) self.assertEqual(mc1.multicolors[self.genome1], 2) self.assertSetEqual({self.genome1}, mc1.colors) self.assertEqual(id(mc1), mc1_id) with self.assertRaises(TypeError): mc1 -= 5
def test_initialization_non_empty_data_attribute(self): v1 = TaggedBlockVertex("v1") v2 = TaggedBlockVertex("v2") multicolor = Multicolor(self.genome1) data = {"fragment": {"name": "scaffold1"}} edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor, data=data) self.assertDictEqual(edge.data, data)
def test_get_fragments_grimm_from_breakpoint_graph_single_genome_with_repeat_based_merges( self): data = [ ">Mouse", "# data :: fragment : name = scaffold1", "1 ALC__repeat $", "# data :: fragment : name = scaffold2", "ALC__repeat 2 $", "# data :: fragment : name = scaffold3", "ALC__repeat 3 $" ] bg = self._populate_bg(data=data) iv1 = bg.get_vertex_by_name("1h__repeat:ALCt__infinity") iv2 = bg.get_vertex_by_name("2t__repeat:ALCh__infinity") v1 = bg.get_vertex_by_name("1h") v2 = bg.get_vertex_by_name("2t") kbreak = KBreak(start_edges=[(v1, iv1), (v2, iv2)], result_edges=[(v1, v2), (iv1, iv2)], multicolor=Multicolor(BGGenome("Mouse"))) bg.apply_kbreak(kbreak=kbreak) grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph( bg=bg) possibilities_1 = ["scaffold1 scaffold2 $", "-scaffold2 -scaffold1 $"] possibilities_3 = ["scaffold3 $", "-scaffold3 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_3)))
def from_assembly_points_file(cls, separated_values): info = AssemblyPointInfo() info.target_color = Multicolor( BGGenome(separated_values["genome"].strip())) repeats = separated_values["repeat1 - repeat2"].strip() repeat1, repeat2 = repeats.split(" - ") repeat_info = { "repeat_name_1": repeat1[:-3], "repeat_dir_1": repeat1[-2], "repeat_name_2": repeat2[:-3], "repeat_dir_2": repeat2[-2] } support_edge_existed = separated_values["s_edge"] info.support_edge = support_edge_existed repeat_guidance = separated_values["repeat_guidance"].strip() repeat_guidance = repeat_guidance.split(", ") repeat_info["repeat_guidance"] = repeat_guidance info.repeat_info = repeat_info target_multicolor = Multicolor(*list( map(lambda entry: BGGenome(entry), separated_values["MC"].strip().split(", ")))) info.target_multicolor = target_multicolor result = cls() result.id = separated_values["id"].strip() result.cc_id = separated_values.get("cc_id", None).strip() vertices = separated_values["v1 - v2"] vertex1, vertex2 = vertices.split(" - ") vertex1, vertex2 = vertex1.strip(), vertex2.strip() result.vertex1 = vertex1 result.vertex2 = vertex2 fragments = separated_values["fragment1 - fragment2"].strip() fragment1, fragment2 = fragments.split(" - ") fragment1, fragment2 = fragment1.strip(), fragment2.strip() result.fragment1 = fragment1 result.fragment2 = fragment2 result.fragment1_sign = "-" if result.fragment1.startswith( "-") else "+" result.fragment2_sign = "-" if result.fragment2.startswith( "-") else "+" if result.fragment1.startswith("-"): result.fragment1 = result.fragment1[1:] if result.fragment2.startswith("-"): result.fragment2 = result.fragment2[1:] result.info = info return result
def __init__(self, newick=None, newick_format=1, dist=DEFAULT_EDGE_LENGTH, leaf_wrapper=BGGenome): self.tree = Tree(newick=newick, format=newick_format, dist=dist) self.__root = self.tree self.__leaf_wrapper = leaf_wrapper # a callable, that would be called with leaf name as an argument for Multicolor class self.multicolors_are_up_to_date = False self.__tree_consistent_multicolors_set = { Multicolor().hashable_representation } self.__tree_consistent_multicolors = [Multicolor()] self.__vtree_consistent_multicolors_set = { Multicolor().hashable_representation } self.__vtree_consistent_multicolors = [Multicolor()]
def test__lt__and__le__(self): # multicolor are compared as follows: # for all the colors in the left argument of comparison, checks that multiplicity of that color in right argument is # less (less-equal) mc1 = Multicolor(self.genome1, self.genome2, self.genome1) mc2 = Multicolor(self.genome1, self.genome2) self.assertTrue(mc2 < mc1) self.assertTrue(mc2 <= mc1) self.assertFalse(mc2 <= 5) self.assertFalse(mc2 < 5) mc2 = Multicolor(self.genome1, self.genome1, self.genome2) self.assertFalse(mc2 < mc1) self.assertTrue(mc2 <= mc1) mc2 = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2) self.assertFalse(mc2 < mc1) self.assertFalse(mc2 <= mc1)
def test_initialization(self): # simple correct initialization of BGEdge instance v1 = BlockVertex("v1") v2 = BlockVertex("v2") multicolor = Multicolor(self.genome3) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) self.assertEqual(edge.vertex1, v1) self.assertEqual(edge.vertex2, v2) self.assertEqual(edge.multicolor, multicolor)
def test__add__(self): # + operator is overloaded and works just like a "merge" method, but support only Multicolor instance as an argument # for any other type of argument a TypeError is raised mc1 = Multicolor(self.genome1, self.genome2) mc2 = Multicolor(self.genome3, self.genome5, self.genome1) mc3 = mc1 + mc2 self.assertEqual(len(mc3.colors), 4) self.assertEqual(len(mc3.multicolors), 4) self.assertSetEqual( {self.genome1, self.genome2, self.genome3, self.genome5}, mc3.colors) for color in mc3.multicolors: if color == self.genome1: self.assertEqual(mc3.multicolors[color], 2) else: self.assertEqual(mc3.multicolors[color], 1) with self.assertRaises(TypeError): mc1 + 5
def __get_v_tree_consistent_leaf_based_hashable_multicolors(self): """ Internally used method, that recalculates VTree-consistent sets of leaves in the current tree """ result = [] nodes = deque([self.__root]) while len(nodes) > 0: current_node = nodes.popleft() children = current_node.children nodes.extend(children) if not current_node.is_leaf(): leaves = filter(lambda node: node.is_leaf(), current_node.get_descendants()) result.append( Multicolor( *[self.__leaf_wrapper(leaf.name) for leaf in leaves])) else: result.append( Multicolor(self.__leaf_wrapper(current_node.name))) result.append(Multicolor()) return result
def get_full_irregular_multicolor(vertex, data, graph=None): result = Multicolor() if graph is None: bg = data["gos-asm"]["bg"] else: bg = graph for edge in bg.get_edges_by_vertex(vertex): if edge.is_irregular_edge: result += edge.multicolor return result
def get_full_support_edge(regular_vertex1, regular_vertex2, data): bg = data["gos-asm"]["bg"] multicolor = Multicolor(*[ color for bgedge in bg.edges_between_two_vertices(vertex1=regular_vertex1, vertex2=regular_vertex2) for color in bgedge.multicolor.colors ]) return BGEdge(vertex1=regular_vertex1, vertex2=regular_vertex2, multicolor=multicolor)
def test__mull__(self): # empty multicolor shall be kept as is regardless of multiplier mc = Multicolor() for multiplier in range(10): self.assertEqual(mc * multiplier, Multicolor()) # multiplying by 0 shall make any multicolor an empty one mc1 = Multicolor(self.genome1) self.assertEqual(mc1 * 0, Multicolor()) mc2 = Multicolor(self.genome1, self.genome2, self.genome3) self.assertEqual(mc2 * 0, Multicolor()) mc3 = Multicolor(self.genome1, self.genome2, self.genome1) self.assertEqual(mc3 * 0, Multicolor()) # multiplying by an integer shall multiply each color multiplicity respectively mc = Multicolor(self.genome1, self.genome2, self.genome3, self.genome1, self.genome2, self.genome1) for multiplier in range(1, 50): ref_multicolor = Multicolor() for _ in range(multiplier): ref_multicolor += mc self.assertEqual(mc * multiplier, ref_multicolor)
def test_multiple_initialization(self): # cases when multiple genomes with different multiplicities (from 1 to >1 are specified) mc = Multicolor(self.genome1, self.genome2, self.genome3) self.assertEqual(len(mc.colors), 3) self.assertEqual(len(mc.multicolors), 3) self.assertSetEqual({self.genome1, self.genome2, self.genome3}, mc.colors) for color in mc.multicolors: self.assertEqual(mc.multicolors[color], 1) mc = Multicolor(*[self.genome1, self.genome2, self.genome3]) self.assertEqual(len(mc.colors), 3) self.assertEqual(len(mc.multicolors), 3) self.assertSetEqual({self.genome1, self.genome2, self.genome3}, mc.colors) for color in mc.multicolors: self.assertEqual(mc.multicolors[color], 1) mc1 = Multicolor(self.genome1, self.genome2, self.genome1) self.assertEqual(len(mc1.colors), 2) self.assertEqual(mc1.multicolors[self.genome2], 1) self.assertEqual(mc1.multicolors[self.genome1], 2)