示例#1
0
 def test_iter_over_colors_json_ids(self):
     # when multiedge is serialized into json a list of colors in it referenced by their ids
     # the multiplicity of colors has to be preserved
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     genomes = [
         self.genome1, self.genome1, self.genome2, self.genome3,
         self.genome2
     ]
     multicolor = Multicolor(*genomes)
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     json_ids = bgedge.colors_json_ids
     self.assertTrue(isinstance(json_ids, list))
     json_ids_list = json_ids
     self.assertEqual(len(json_ids_list), 5)
     ref_json_ids = Counter(genome.json_id for genome in genomes)
     res_json_ids = Counter(json_ids_list)
     self.assertDictEqual(ref_json_ids, res_json_ids)
     # case when color objects are not a BGGenome, but some other hashable object without json_id attribute
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     colors = ["red", "red", "green", "black", "yellow", "green"]
     multicolor = Multicolor(*colors)
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     json_ids = bgedge.colors_json_ids
     self.assertTrue(isinstance(json_ids, list))
     json_ids_list = json_ids
     self.assertEqual(len(json_ids_list), 6)
     ref_json_ids = Counter(hash(genome) for genome in colors)
     res_json_ids = Counter(json_ids_list)
     self.assertDictEqual(ref_json_ids, res_json_ids)
     # case when color objects are mixed objects: BGGenome objects, just hashable, have json_id but not BGGenome
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     mock1, mock2 = Mock(), Mock()
     mock1.json_id = 5
     mock2.json_id = 6
     colors = [
         self.genome1, mock1, self.genome2, "black", mock2, self.genome2
     ]
     multicolor = Multicolor(*colors)
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     json_ids = bgedge.colors_json_ids
     self.assertTrue(isinstance(json_ids, list))
     json_ids_list = list(json_ids)
     self.assertEqual(len(json_ids_list), 6)
     ref_json_ids = Counter(
         genome.json_id if hasattr(genome, "json_id") else hash(genome)
         for genome in colors)
     res_json_ids = Counter(json_ids_list)
     self.assertDictEqual(ref_json_ids, res_json_ids)
示例#2
0
 def test_data_update_non_dict_source(self):
     edge = BGEdge(
         vertex1=TaggedBlockVertex("v1"),
         vertex2=TaggedBlockVertex("v2"),
         multicolor=Multicolor(self.genome1, self.genome2),
         data={"fragment": {
             "name": "scaffold2",
             "origin": "test"
         }})
     for source in [1, "2", Multicolor(), (1, ), [
             2,
     ]]:
         with self.assertRaises(ValueError):
             edge.update_data(source=source)
示例#3
0
 def test_is_irregular_edge(self):
     # and edge is called an irregular edge if at least one of its vertices is an irregular vertex
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     v3 = InfinityVertex("v3")
     multicolor = Multicolor(self.genome3)
     edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     edge2 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor)
     edge3 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor)
     edge4 = BGEdge(vertex1=v3, vertex2=v3, multicolor=multicolor)
     self.assertFalse(edge1.is_irregular_edge)
     self.assertTrue(edge2.is_irregular_edge)
     self.assertTrue(edge3.is_irregular_edge)
     self.assertTrue(edge4.is_irregular_edge)
示例#4
0
 def test_merging_correct(self):
     # two BGEdges can be merged together into a third, separate BGEdge
     # that would contain information from both supplied BGEdges in terms of colors and multiplicities
     # such merge is allowed only if a pair of vertices in both BGEdges is the same
     # ordering of vertices if not a concern, since edges in BreakpointGraph are not directed
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     multicolor = Multicolor(self.genome3)
     multicolor1 = Multicolor(self.genome2)
     edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     edge2 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor1)
     merged_edge = BGEdge.merge(edge1, edge2)
     self.assertEqual(merged_edge.vertex1, v1)
     self.assertEqual(merged_edge.vertex2, v2)
     self.assertEqual(merged_edge.multicolor, multicolor + multicolor1)
示例#5
0
 def test_initialization_non_empty_data_attribute(self):
     v1 = TaggedBlockVertex("v1")
     v2 = TaggedBlockVertex("v2")
     multicolor = Multicolor(self.genome1)
     data = {"fragment": {"name": "scaffold1"}}
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor, data=data)
     self.assertDictEqual(edge.data, data)
示例#6
0
 def test_initialization(self):
     # simple correct initialization of BGEdge instance
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     multicolor = Multicolor(self.genome3)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     self.assertEqual(edge.vertex1, v1)
     self.assertEqual(edge.vertex2, v2)
     self.assertEqual(edge.multicolor, multicolor)
示例#7
0
 def test_data_update_empty_source(self):
     edge = BGEdge(
         vertex1=TaggedBlockVertex("v1"),
         vertex2=TaggedBlockVertex("v2"),
         multicolor=Multicolor(self.genome1, self.genome2),
         data={"fragment": {
             "name": "scaffold2",
             "origin": "test"
         }})
     update_source = {}
     edge.update_data(source=update_source)
     self.assertTrue(
         dicts_are_equal(
             edge.data,
             {"fragment": {
                 "name": "scaffold2",
                 "origin": "test"
             }}))
示例#8
0
 def test_data_update(self):
     update_source = {"fragment": {"name": "scaffold11"}}
     edge = BGEdge(
         vertex1=TaggedBlockVertex("v1"),
         vertex2=TaggedBlockVertex("v2"),
         multicolor=Multicolor(self.genome1, self.genome2),
         data={"fragment": {
             "name": "scaffold2",
             "origin": "test"
         }})
     edge.update_data(source=update_source)
     self.assertIsInstance(edge.data, dict)
     self.assertIn("fragment", edge.data)
     self.assertIsInstance(edge.data["fragment"], dict)
     self.assertIn("name", edge.data["fragment"])
     self.assertIn("origin", edge.data["fragment"])
     self.assertEqual(edge.data["fragment"]["name"], "scaffold11")
     self.assertEqual(edge.data["fragment"]["origin"], "test")
示例#9
0
def get_full_support_edge(regular_vertex1, regular_vertex2, data):
    bg = data["gos-asm"]["bg"]
    multicolor = Multicolor(*[
        color
        for bgedge in bg.edges_between_two_vertices(vertex1=regular_vertex1,
                                                    vertex2=regular_vertex2)
        for color in bgedge.multicolor.colors
    ])
    return BGEdge(vertex1=regular_vertex1,
                  vertex2=regular_vertex2,
                  multicolor=multicolor)
示例#10
0
    def test_json_deserialization_supplied_schema(self):
        # when a scheme is supplied it shall be used for deserialization
        # correct case no information in json object about schema
        class BGEdgeJSONSchemeDefaultVertex1(BGEdge.BGEdgeJSONSchema):
            vertex1_id = fields.Int(attribute="vertex1_json_id",
                                    required=False,
                                    default=1,
                                    missing=1)

        json_object = {
            "vertex1_id": 1,
            "vertex2_id": 2,
            "multicolor": [1, 2, 3, 4]
        }
        result = BGEdge.from_json(
            data=json_object, json_schema_class=BGEdgeJSONSchemeDefaultVertex1)
        self.assertTrue(isinstance(result, BGEdge))
        self.assertEqual(result.vertex1, 1)
        self.assertEqual(result.vertex2, 2)
        self.assertListEqual(result.multicolor, [1, 2, 3, 4])
        # correct case with information about json object schema
        # such information about json schema shall be ignored at the BGEdge deserialization level
        json_object = {
            BGEdge_JSON_SCHEMA_JSON_KEY: "dummy_string",
            "vertex1_id": 1,
            "vertex2_id": 2,
            "multicolor": [1, 2, 3, 4]
        }
        result = BGEdge.from_json(
            data=json_object, json_schema_class=BGEdgeJSONSchemeDefaultVertex1)
        self.assertTrue(isinstance(result, BGEdge))
        self.assertEqual(result.vertex1, 1)
        self.assertEqual(result.vertex2, 2)
        self.assertListEqual(result.multicolor, [1, 2, 3, 4])
        # incorrect case with at least one vertex id not present in json object
        json_object = {"vertex2_id": 2, "multicolor": [1, 2, 3, 4]}
        result = BGEdge.from_json(
            data=json_object, json_schema_class=BGEdgeJSONSchemeDefaultVertex1)
        self.assertTrue(isinstance(result, BGEdge))
        self.assertEqual(result.vertex1, 1)
        self.assertEqual(result.vertex2, 2)
        self.assertListEqual(result.multicolor, [1, 2, 3, 4])
        json_object = {"vertex1_id": 2, "multicolor": [1, 2, 3, 4]}
        with self.assertRaises(ValidationError):
            BGEdge.from_json(data=json_object)
        # incorrect case with no multicolor present in json object
        json_object = {
            "vertex2_id": 2,
            "vertex1_id": 1,
        }
        with self.assertRaises(ValidationError):
            BGEdge.from_json(data=json_object)
示例#11
0
 def test_json_deserialization_default_schema(self):
     # with no scheme is supplied, default scheme for the BGVertex is applied
     # deserialization for vertices and multicolor shall be performed as is, but then it will be resupplied from
     # the overviewing BreakpointGraph
     # correct case no information in json object about schema
     json_object = {
         "vertex1_id": 1,
         "vertex2_id": 2,
         "multicolor": [1, 2, 3, 4]
     }
     result = BGEdge.from_json(data=json_object)
     self.assertTrue(isinstance(result, BGEdge))
     self.assertEqual(result.vertex1, 1)
     self.assertEqual(result.vertex2, 2)
     self.assertListEqual(result.multicolor, [1, 2, 3, 4])
     # correct case with information about json object schema
     # such information about json schema shall be ignored at the BGEdge deserialization level
     json_object = {
         BGEdge_JSON_SCHEMA_JSON_KEY: "dummy_string",
         "vertex1_id": 1,
         "vertex2_id": 2,
         "multicolor": [1, 2, 3, 4]
     }
     result = BGEdge.from_json(data=json_object)
     self.assertTrue(isinstance(result, BGEdge))
     self.assertEqual(result.vertex1, 1)
     self.assertEqual(result.vertex2, 2)
     self.assertListEqual(result.multicolor, [1, 2, 3, 4])
     # incorrect case with at least one vertex id not present in json object
     json_object = {"vertex2_id": 2, "multicolor": [1, 2, 3, 4]}
     with self.assertRaises(ValidationError):
         BGEdge.from_json(data=json_object)
     json_object = {"vertex1_id": 2, "multicolor": [1, 2, 3, 4]}
     with self.assertRaises(ValidationError):
         BGEdge.from_json(data=json_object)
     # incorrect case with no multicolor present in json object
     json_object = {
         "vertex2_id": 2,
         "vertex1_id": 1,
     }
     with self.assertRaises(ValidationError):
         BGEdge.from_json(data=json_object)
示例#12
0
    def get_breakpoint_graph(stream, merge_edges=True):
        """ Taking a file-like object transforms supplied gene order data into the language of

        :param merge_edges: a flag that indicates if parallel edges in produced breakpoint graph shall be merged or not
        :type merge_edges: ``bool``
        :param stream: any iterable object where each iteration produces a ``str`` object
        :type stream: ``iterable`` ver ``str``
        :return: an instance of a BreakpointGraph that contains information about adjacencies in genome specified in GRIMM formatted input
        :rtype: :class:`bg.breakpoint_graph.BreakpointGraph`
        """
        result = BreakpointGraph()
        current_genome = None
        fragment_data = {}
        for line in stream:
            line = line.strip()
            if len(line) == 0:
                ###############################################################################################
                #
                # empty lines are omitted
                #
                ###############################################################################################
                continue
            if GRIMMReader.is_genome_declaration_string(data_string=line):
                ###############################################################################################
                #
                # is we have a genome declaration, we must update current genome
                # all following gene order data (before EOF or next genome declaration) will be attributed to current genome
                #
                ###############################################################################################
                current_genome = GRIMMReader.parse_genome_declaration_string(
                    data_string=line)
                fragment_data = {}
            elif GRIMMReader.is_comment_string(data_string=line):
                if GRIMMReader.is_comment_data_string(string=line):
                    path, (key, value) = GRIMMReader.parse_comment_data_string(
                        comment_data_string=line)
                    if len(path) > 0 and path[0] == "fragment":
                        add_to_dict_with_path(destination_dict=fragment_data,
                                              key=key,
                                              value=value,
                                              path=path)
                else:
                    continue
            elif current_genome is not None:
                ###############################################################################################
                #
                # gene order information that is specified before the first genome is specified can not be attributed to anything
                # and thus omitted
                #
                ###############################################################################################
                parsed_data = GRIMMReader.parse_data_string(data_string=line)
                edges = GRIMMReader.get_edges_from_parsed_data(
                    parsed_data=parsed_data)
                for v1, v2 in edges:
                    edge_specific_data = {
                        "fragment": {
                            "forward_orientation": (v1, v2)
                        }
                    }
                    edge = BGEdge(vertex1=v1,
                                  vertex2=v2,
                                  multicolor=Multicolor(current_genome),
                                  data=deepcopy(fragment_data))
                    edge.update_data(source=edge_specific_data)
                    result.add_bgedge(bgedge=edge, merge=merge_edges)
        return result
示例#13
0
 def test_is_bgedge_vtree_consistent(self):
     v1, v2 = "v1", "v2"
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=Multicolor())
     ##########################################################################################
     #
     # bgedge with an empty multicolor complies with any tree
     #
     ##########################################################################################
     mc = Multicolor()
     bgedge.multicolor = mc
     self.assertTrue(BGTree("(v1, v2);").bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # simple cases
     #
     ##########################################################################################
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     bgedge.multicolor = Multicolor(self.bg_v1)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # a small v1, v2 subtree, still consistent
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # bigger v1, v2, v3 subtree, still consistent
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one)
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, BGGenome("v6"))
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # other cases for a non wgd tree
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v4)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v3, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v4, self.bg_v5)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v5, self.bg_v4)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v4, self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
示例#14
0
    def test_equality(self):
        # edges are called equal if they connect same pairs of vertices and have same multicolor assigned to them
        v1 = BlockVertex("v1")
        v2 = BlockVertex("v2")
        v3 = BlockVertex("v3")
        v4 = BlockVertex("v4")
        multicolor = Multicolor(self.genome3)
        multicolor1 = Multicolor(self.genome2)
        edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
        edge2 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor1)
        edge3 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor1)
        edge4 = BGEdge(vertex1=v3, vertex2=v4, multicolor=multicolor)
        self.assertNotEqual(edge1, edge2)
        self.assertNotEqual(edge1, edge3)
        self.assertNotEqual(edge2, edge3)
        self.assertNotEqual(edge1, edge4)
        edge4 = BGEdge(vertex1=v2, vertex2=v1, multicolor=multicolor)
        edge5 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
        self.assertEqual(edge1, edge4)
        self.assertEqual(edge1, edge5)
        self.assertEqual(edge4, edge5)
        self.assertNotEqual(edge1, 5)
        edge6 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor)
        self.assertNotEqual(edge1, edge6)

        self.assertEqual(edge1, edge4)
        edge4.data = {"fragment": {"name": 1}}
        edge1.data = {"fragment": {"name": 2}}
        self.assertNotEqual(edge1, edge4)
        edge1.data = {"fragment": {"name": 1}}
        self.assertEqual(edge1, edge4)
示例#15
0
 def test_merging_incorrect(self):
     # cases when vertices in two supplied for the merging edges are not consistent
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     v3 = BlockVertex("v3")
     v4 = BlockVertex("v4")
     multicolor = Multicolor(self.genome3)
     multicolor1 = Multicolor(self.genome2)
     edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     edge2 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v3, vertex2=v2, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v3, vertex2=v4, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v1, vertex2=v1, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v2, vertex2=v2, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
示例#16
0
 def test_json_serialization(self):
     # simple case of serialization, single color, no multiplicity
     v1, v2 = BlockVertex("v1"), BlockVertex("v2")
     color1 = BGGenome("genome1")
     multicolor = Multicolor(color1)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     ref_result = {
         "vertex1_id": v1.json_id,
         "vertex2_id": v2.json_id,
         "multicolor": [color1.json_id]
     }
     self.assertDictEqual(edge.to_json(schema_info=False), ref_result)
     # case where multiple colors are present, multiplicity is 1 for every of them
     color2 = BGGenome("genome2")
     multicolor = Multicolor(color1, color2)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     result = edge.to_json()
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], v2.json_id)
     self.assertSetEqual(set(result["multicolor"]),
                         {color1.json_id, color2.json_id})
     self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY],
                      edge.json_schema_name)
     # case where multiple colors are present, multiplicity is both 1 and greater than 1
     color3 = BGGenome("genome3")
     multicolor = Multicolor(color1, color1, color1, color2, color2, color3)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     result = edge.to_json(schema_info=False)
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], v2.json_id)
     self.assertSetEqual(set(result["multicolor"]),
                         {color1.json_id, color2.json_id, color3.json_id})
     self.assertDictEqual(
         Counter(result["multicolor"]),
         Counter(color.json_id
                 for color in multicolor.multicolors.elements()))
     # weird case when a vertex1/vertex attribute in edge is not an instance of BGVertex
     # and moreover it does not have "json_id" attribute
     edge = BGEdge(vertex1=v1, vertex2=1, multicolor=Multicolor(color1))
     result = edge.to_json()
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], hash(1))
     self.assertListEqual(result["multicolor"], [color1.json_id])
     self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY],
                      edge.json_schema_name)
示例#17
0
 def test_empty_initialization_incorrect(self):
     with self.assertRaises(TypeError):
         # a BGEdge wrapper is meant to wrap something, but not nothing
         BGEdge()
示例#18
0
 def test_initialization_empty_data_attribute(self):
     v1 = TaggedBlockVertex("v1")
     v2 = TaggedBlockVertex("v2")
     multicolor = Multicolor(self.genome1)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     self.assertDictEqual(edge.data, BGEdge.create_default_data_dict())