Пример #1
0
    def test_graph_equality(self):
        self.graph.clear()
        self.graph.from_string(sample_gfa2)
        same_graph = gfa.GFA()
        same_graph.from_string(sample_gfa2)
        self.assertTrue(self.graph == same_graph)
        another_equal_graph = gfa.GFA()
        another_equal_graph.from_string(self.graph.dump(2))
        self.assertTrue(another_equal_graph == self.graph)

        different_node = copy.deepcopy(another_equal_graph)
        different_node.node("3")['sequence'] += "ACGT"
        self.assertFalse(self.graph == different_node)

        # Make end nodes sequence empty and check if
        # virtuals comparison works
        different_edge = copy.deepcopy(another_equal_graph)
        different_edge.node("1")["sequence"] = "*"
        different_edge.node("2")["sequence"] = "*"
        edge_ = different_edge.edge("1_to_2")
        different_edge.remove_edge("1_to_2")
        different_edge._graph.add_edge("1", "2", key="*", **edge_)
        self.assertFalse(self.graph == different_edge)

        self.graph.clear()
        self.graph.from_string(sample_gfa1)
        same_graph = gfa.GFA()
        same_graph.from_string(sample_gfa1)
        self.assertTrue(self.graph == same_graph)
        another_equal_graph = gfa.GFA()
        another_equal_graph.from_string(self.graph.dump(1))
        self.assertTrue(another_equal_graph == self.graph)
Пример #2
0
    def test_serialize_gfa2_graph(self):
        self.graph.clear()
        mini_graph = str.join("", ["S\t11\t42\t*\txx:i:11\n", \
                                    "S\t21\t13\t*\n", \
                                    "E\t15\t11+\t13+\t21\t42\t42\t21\t120M\n", \
                                    "O\t33\t11+ 13+\n"])
        self.graph.from_string(mini_graph)
        same_graph_repr = gs2.serialize_gfa(self.graph)
        same_graph = gfa.GFA()
        same_graph.from_string(same_graph_repr)

        self.assertTrue(
            self.graph.nodes(data=True) == same_graph.nodes(data=True))
        self.assertTrue(
            self.graph.edges(adj_dict=True) == same_graph.edges(adj_dict=True))
        self.assertTrue(self.graph.subgraphs() == same_graph.subgraphs())

        another_equal_graph_repr = gs2.serialize_gfa(same_graph)
        another_equal_graph = gfa.GFA()
        another_equal_graph.from_string(another_equal_graph_repr)

        self.assertTrue(
            another_equal_graph.nodes(data=True) == same_graph.nodes(
                data=True))
        self.assertTrue(
            another_equal_graph.edges(adj_dict=True) == same_graph.edges(
                adj_dict=True))
        self.assertTrue(another_equal_graph.subgraphs() == \
                            same_graph.subgraphs())
Пример #3
0
    def test_subgraph(self):
        """Test the subgraph interface to networkx method
        `subgraph`."""
        self.graph.clear()
        self.graph.from_string(sample_gfa2)
        subgraph_ = self.graph.subgraph(["1", "3", "11"])
        self.assertTrue(subgraph_ is not None)
        self.assertTrue(isinstance(subgraph_, nx.MultiGraph))
        self.assertTrue(len(subgraph_.nodes()) == 3)
        self.assertTrue(len(subgraph_.edges()) == 2)
        self.assertTrue(subgraph_.edge["1"]["11"]["1_to_11"] is not None)
        self.assertTrue(subgraph_.edge["1"]["3"]["1_to_3"] is not None)
        # test copy subgraph
        subgraph_.node["3"]["nid"] = 42
        self.assertTrue(subgraph_.node["3"] != self.graph.node("3"))

        # create a GFA graph using the subgraph as base graph
        gfa_ = gfa.GFA(subgraph_)
        self.assertTrue(gfa_.edge("1_to_3") is not None)
        self.assertTrue(subgraph_.edge["1"]["3"]["1_to_3"] == \
                             gfa_.edge("1_to_3"))

        subgraph_ = self.graph.subgraph(["1", "3", "11"], copy=False)
        subgraph_.node["3"]["nid"] = 42
        self.assertTrue(subgraph_.node["3"] == self.graph.node("3"))
Пример #4
0
    def test_serialize_gfa1_graph(self):
        self.graph.clear()

        mini_graph = str.join("", ["S\t11\t*\txx:i:11\n", \
                                       "S\t13\t*\n", \
                                       "L\t11\t+\t13\t+\t120M\n", \
                                       "P\t15\t11+,13+\t120M\n"])
        self.graph.from_string(mini_graph)
        same_graph_repr = gs1.serialize_gfa(self.graph)
        same_graph = gfa.GFA()
        same_graph.from_string(same_graph_repr)

        self.assertTrue(self.graph.node() == same_graph.node())
        self.assertTrue(self.graph.edge() == same_graph.edge())
        self.assertTrue(self.graph.subgraphs() == same_graph.subgraphs())
Пример #5
0
    def test_dovetails_subgraph(self):
        """Use the dovetails_subgraph method on
        a GFA1 file, and test wheter the subgraphs
        contains edges that are not dovetails overlap
        edges.
        """
        self.graph.clear()
        self.graph.from_string(sample_gfa1)
        subgraph_ = self.graph.dovetails_subgraph()
        self.assertTrue(subgraph_ is not None)
        self.assertTrue(isinstance(subgraph_, nx.MultiGraph))
        self.assertTrue(len(subgraph_.nodes()) == 9)
        self.assertTrue(len(subgraph_.edges()) == 4)
        self.assertTrue(
            subgraph_.get_edge_data("1", "2", "1_to_2") is not None)
        self.assertTrue(
            subgraph_.get_edge_data("1", "3", "1_to_3") is not None)
        self.assertTrue(
            subgraph_.get_edge_data("11", "13", "11_to_13") is not None)
        self.assertTrue(
            subgraph_.get_edge_data("11", "12", "11_to_12") is not None)

        #with self.assertRaises(KeyError):
        #    self.assertTrue(subgraph_.get_edge_data("2","6","2_to_6") is None)
        #with self.assertRaises(KeyError):
        #    self.assertTrue(subgraph_.get_edge_data("1","5","1_to_5") is None)

        # test copy subgraph
        subgraph_.nodes["1"]["nid"] = 42
        self.assertTrue(
            subgraph_.nodes["1"] != self.graph.nodes(identifier="1"))

        # create a GFA graph using the subgraph as base graph
        gfa_ = gfa.GFA(subgraph_)
        self.assertTrue(gfa_.edges(identifier="1_to_3") is not None)
        self.assertTrue(subgraph_.get_edge_data("1","3","1_to_3") == \
                             gfa_.edges(identifier = "1_to_3"))

        subgraph_ = self.graph.subgraph(["1", "3", "11"], copy=False)
        subgraph_.nodes["3"]["nid"] = 42
        self.assertTrue(subgraph_.nodes["3"] == self.graph.nodes(
            identifier="3"))
Пример #6
0
    def test_GFA_graph(self):
        """Test GFA constructor and accessor methods.
        """
        tmp = gfa.GFA()

        self.assertTrue(tmp._get_virtual_id(increment=False) == 0)
        self.assertTrue(tmp.nodes() == [])
        self.assertTrue(tmp.edges() == [])
        self.assertTrue(tmp.subgraphs() == {})

        tmp_nx = nx.Graph()
        with self.assertRaises(gfa.GFAError):
            gfa.GFA(tmp_nx)

        tmp_nx = nx.DiGraph()
        with self.assertRaises(gfa.GFAError):
            gfa.GFA(tmp_nx)


        tmp_nx = nx.MultiGraph()
        tmp_nx.add_node("2", nid="2", sequence="acgt", slen="4")
        tmp_nx.add_node("4", nid="4", sequence="*", slen="25")
        tmp_nx.add_edge("4", "2", key= "virtual_42", eid="*", \
                        from_node="4", to_node="2", \
                        from_orn="+", to_orn="-", \
                        from_positions=None, to_positions=None, \
                        distance=None, variance="*")

        tmp = gfa.GFA(tmp_nx)
        self.assertTrue(len(tmp.node()) == 2)
        # return the edges start from each node, there are 2 nodes,
        # so there are 2 entries into the edge dictionary
        self.assertTrue(len(tmp.edge()) == 2)
        self.assertTrue(tmp._find_max_virtual_id() == 42)
        self.assertTrue(tmp.node("2")["nid"] == "2")
        self.assertTrue(tmp.node("2")["sequence"] == "acgt")
        self.assertTrue(tmp.node("4")["nid"] == "4")
        self.assertTrue(tmp.node("not_exists") == None)

        # exists an edge between two and 4
        self.assertTrue(len(tmp.edge(("4", "2"))) == 1)
        self.assertTrue(tmp.edge(("4", "2", "virtual_42"))["eid"] == "*")
        self.assertTrue(tmp.edge("None_Key") == None)
        self.assertTrue(tmp.edge(("4", "None_Node")) == None)

        # with self.assertRaises(gfa.InvalidSearchParameters):
        #     tmp.edge(("4"))
        # this a language issue, this is seen from the interpreter as
        # a string and not a single element tuple.
        with self.assertRaises(gfa.InvalidSearchParameters):
             tmp.edge(("4", )) # this is a single element tuple

        with self.assertRaises(gfa.InvalidSearchParameters):
            tmp.edge(()) # this is a single element tuple


        tmp.edge("virtual_42")['alignment'] = "20M2I4D"
        self.assertTrue(tmp.edge("virtual_42")['alignment'] == "20M2I4D")

        tmp.node("4")["new_attribute"] = 42
        self.assertTrue(tmp.node("4")["new_attribute"] == 42)
Пример #7
0
class TestLine (unittest.TestCase):

    graph = gfa.GFA ()

    def test_GFA_graph(self):
        """Test GFA constructor and accessor methods.
        """
        tmp = gfa.GFA()

        self.assertTrue(tmp._get_virtual_id(increment=False) == 0)
        self.assertTrue(tmp.nodes() == [])
        self.assertTrue(tmp.edges() == [])
        self.assertTrue(tmp.subgraphs() == {})

        tmp_nx = nx.Graph()
        with self.assertRaises(gfa.GFAError):
            gfa.GFA(tmp_nx)

        tmp_nx = nx.DiGraph()
        with self.assertRaises(gfa.GFAError):
            gfa.GFA(tmp_nx)


        tmp_nx = nx.MultiGraph()
        tmp_nx.add_node("2", nid="2", sequence="acgt", slen="4")
        tmp_nx.add_node("4", nid="4", sequence="*", slen="25")
        tmp_nx.add_edge("4", "2", key= "virtual_42", eid="*", \
                        from_node="4", to_node="2", \
                        from_orn="+", to_orn="-", \
                        from_positions=None, to_positions=None, \
                        distance=None, variance="*")

        tmp = gfa.GFA(tmp_nx)
        self.assertTrue(len(tmp.node()) == 2)
        # return the edges start from each node, there are 2 nodes,
        # so there are 2 entries into the edge dictionary
        self.assertTrue(len(tmp.edge()) == 2)
        self.assertTrue(tmp._find_max_virtual_id() == 42)
        self.assertTrue(tmp.node("2")["nid"] == "2")
        self.assertTrue(tmp.node("2")["sequence"] == "acgt")
        self.assertTrue(tmp.node("4")["nid"] == "4")
        self.assertTrue(tmp.node("not_exists") == None)

        # exists an edge between two and 4
        self.assertTrue(len(tmp.edge(("4", "2"))) == 1)
        self.assertTrue(tmp.edge(("4", "2", "virtual_42"))["eid"] == "*")
        self.assertTrue(tmp.edge("None_Key") == None)
        self.assertTrue(tmp.edge(("4", "None_Node")) == None)

        # with self.assertRaises(gfa.InvalidSearchParameters):
        #     tmp.edge(("4"))
        # this a language issue, this is seen from the interpreter as
        # a string and not a single element tuple.
        with self.assertRaises(gfa.InvalidSearchParameters):
             tmp.edge(("4", )) # this is a single element tuple

        with self.assertRaises(gfa.InvalidSearchParameters):
            tmp.edge(()) # this is a single element tuple


        tmp.edge("virtual_42")['alignment'] = "20M2I4D"
        self.assertTrue(tmp.edge("virtual_42")['alignment'] == "20M2I4D")

        tmp.node("4")["new_attribute"] = 42
        self.assertTrue(tmp.node("4")["new_attribute"] == 42)


    def test_add_node (self):
        """Test add_node and remove_node methods."""
        self.graph.clear ()
        seg = segment.SegmentV1.from_string ("S\t3\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test")
        node_ = node.Node.from_line (seg)
        self.graph.add_node (node_)
        seg.fields['name']._value = "node4"
        node_ = node.Node.from_line(seg)
        self.graph.add_node (node_)

        self.graph._graph.add_edge("3", "node4") # create an edge (indirectly)
        self.assertTrue(len(self.graph.edges()) == 1)

        self.assertTrue (len (self.graph.nodes ()) == 2)
        self.assertTrue (self.graph.node('3')['sequence'] == "TGCAACGTATAGACTTGTCAC")
        self.assertTrue (self.graph.node('3')['nid'] == "3")
        self.assertTrue (self.graph.node('3')['ui'].value == "test")
        self.assertTrue (self.graph.node('3')['ui'].type == "Z")

        self.graph.remove_node("3")
        self.assertTrue(self.graph.node("3") == None)
        self.assertTrue(len(self.graph.edges()) == 0)
        # the edge between 3 and 4 has been automatically deleted

        self.graph.add_node("S\t3\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test")
        self.assertTrue (self.graph.node('3')['sequence'] == "TGCAACGTATAGACTTGTCAC")
        self.assertTrue (self.graph.node('3')['nid'] == "3")
        self.assertTrue (self.graph.node('3')['ui'].value == "test")
        self.assertTrue (self.graph.node('3')['ui'].type == "Z")
        self.graph.remove_node("3")

        # test GFA2 segment
        self.graph.add_node("S\t3\t21\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test")
        self.assertTrue (self.graph.node('3')['sequence'] == "TGCAACGTATAGACTTGTCAC")
        self.assertTrue (self.graph.node('3')['nid'] == "3")
        self.assertTrue (self.graph.node('3')['ui'].value == "test")
        self.assertTrue (self.graph.node('3')['ui'].type == "Z")
        self.assertTrue (self.graph.node('3')['slen'] == 21)

        with self.assertRaises(gfa.GFAError):
            self.graph.add_node(\
                "S\t3\t21\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test",
                safe=True)

        with self.assertRaises(TypeError):
            self.graph.add_node("21", nid="21", slen="4", sequence="acgt")
        with self.assertRaises(node.InvalidNodeError):
            self.graph.add_node({'nid':"21", 'slen':"4", 'sequence':"acgt"})

        with self.assertRaises(node.InvalidNodeError):
            self.graph.remove_node("42")


    def test_add_edge (self):
        self.graph.clear ()

        self.graph.add_node("S\t3\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test")
        self.graph.add_node("S\t4\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test")

        line = fragment.Fragment.from_string ("F\t3\t4-\t0\t140$\t0\t140\t11M")
        edg = ge.Edge.from_line (line)
        self.graph.add_edge (edg)

        line = link.Link.from_string ("L\t3\t+\t4\t-\t47M\tui:Z:test\tab:Z:another_test")
        edg = ge.Edge.from_line (line)
        self.graph.add_edge (edg)

        # The F line is added first so it will have id 'virtual_0'
        # This first test get all the edges between node 3 and 4 and
        # the get the edge labelled 'virtual_0'
        self.assertTrue (self.graph.edge(('3','4'))['virtual_0']['from_node'] == "3")
        # This test instead get instantly the edge labelled 'virtual_0', that is unique
        # in the graph
        self.assertTrue (self.graph.edge('virtual_0')['from_node'] == "3")
        self.assertTrue (len (self.graph.edge(('3', '4'))) == 2)

        self.graph.remove_edge("virtual_0")
        self.assertTrue (self.graph.edge('virtual_0') == None)
        with self.assertRaises(ge.InvalidEdgeError):
            self.graph.remove_edge("virtual_0")

        # remember the virtual id keeps incrementing
        self.graph.add_edge("F\t3\t4-\t0\t140$\t0\t140\t11M")
        self.assertTrue (self.graph.edge(('3','4'))['virtual_1']['from_node'] == "3")
        self.graph.remove_edge(('3', '4')) # remove all the edges between 3 and 4
        print(len(self.graph.edges()))
        self.assertTrue(len(self.graph.edges()) == 0)

        # nodes will be automatically created
        self.graph.add_edge("L\t3\t+\t65\t-\t47M\tui:Z:test\tab:Z:another_test")
        self.graph.add_edge("C\ta\t+\tb\t-\t10\t*\tui:Z:test\tab:Z:another_test")
        self.graph.add_edge("E\t*\t23-\t16+\t0\t11\t0\t11\t11M\tui:Z:test\tab:Z:another_test")
        self.graph.add_edge("G\tg\tA+\tB-\t1000\t*\tui:Z:test\tab:Z:another_test")
        self.assertTrue(len(self.graph.edges()) == 4)

        self.graph.remove_edge(("A", "B", "g")) # remove the gap
        self.assertTrue(len(self.graph.edges()) == 3)

        self.graph.add_edge("L\t3\t+\t65\t-\t47M\tui:Z:test\tID:Z:42")
        with self.assertRaises(gfa.GFAError):
            self.graph.add_edge(\
                "L\t3\t+\t65\t-\t47M\tui:Z:test\tID:Z:42", \
                safe=True)
        with self.assertRaises(gfa.GFAError):
            self.graph.add_edge(\
                "L\t3\t+\tnon_exists\t-\t47M\tui:Z:test\tID:Z:47", \
                safe=True)

        line = fragment.Fragment.from_string ("F\t3\t4-\t0\t140$\t0\t140\t11M")
        edg = ge.Edge.from_line (line)
        del(edg._eid)
        with self.assertRaises(ge.InvalidEdgeError):
            self.graph.add_edge (edg)
        with self.assertRaises(ge.InvalidEdgeError):
            self.graph.add_edge ("Z\t3\t4-\t0\t140$\t0\t140\t11M") # invalid line


    def test_add_subgraphs (self):
        self.graph.clear ()

        line = path.Path.from_string("P\t14\t11+,12+\t122M\tui:Z:test\tab:Z:another_test")
        sb = sg.Subgraph.from_line(line)
        self.graph.add_subgraph(sb)

        with self.assertRaises(gfa.GFAError):
            self.graph.add_subgraph(sb, safe=True)

        self.graph.add_subgraph("O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.graph.add_subgraph("U\t16sub\t2 3\txx:i:-1")
        self.graph.add_subgraph("P\t14_2\t11_2+,12+\t122M\tui:Z:test\tab:Z:another_test")
        self.graph.add_subgraph("O\t*\t11+ 11_to_13+ 13+\txx:i:-1")
        self.assertTrue(len(self.graph.subgraphs()) == 5)

        self.assertTrue(self.graph.subgraphs('virtual_0') is not None)
        self.graph.remove_subgraph("virtual_0")
        self.assertTrue(self.graph.subgraphs('virtual_0') is None)

        with self.assertRaises(sg.InvalidSubgraphError):
            del(sb._sub_id)
            self.graph.add_subgraph(sb)
        with self.assertRaises(sg.InvalidSubgraphError):
            self.graph.add_subgraph("Z\t14_2\t11_2+,12+\t122M\tui:Z:test\tab:Z:another_test")
        with self.assertRaises(sg.InvalidSubgraphError):
            self.graph.remove_subgraph("42")


    def test_as_graph_element (self):
        self.graph.clear ()

        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string(\
                                "S\t3\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test"))
        self.graph.add_node(node_)
        self.assertTrue (self.graph.as_graph_element('3') == node_)

        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string(\
                                "S\t4\tTGCAACGTATAGACTTGTCAC\tRC:i:4\tui:Z:test\tab:Z:another_test"))
        self.graph.add_node(node_)
        self.assertTrue (self.graph.as_graph_element('4') == node_)

        node_ = node.Node.from_line(\
                        segment.SegmentV2.from_string(\
                                "S\t2\t120\t*\txx:Z:sometag"))
        self.graph.add_node(node_)
        self.assertTrue (self.graph.as_graph_element('2') == node_)

        edge_ = ge.Edge.from_line(\
                        fragment.Fragment.from_string(\
                                "F\t3\t4-\t0\t140$\t0\t140\t11M"))
        self.graph.add_edge(edge_)
        self.assertTrue (self.graph.as_graph_element ('virtual_0') == edge_)

        edge_ = ge.Edge.from_line(\
                        gap.Gap.from_string(\
                                "G\tg\t3+\t4-\t1000\t*\tui:Z:test\tab:Z:another_test"))
        self.graph.add_edge(edge_)
        self.assertTrue (self.graph.as_graph_element ('g') == edge_)

        edge_ = ge.Edge.from_line(\
                        edge.Edge.from_string(\
                                "E\t*\t23-\t16+\t0\t11\t0\t11\t11M\tui:Z:test\tab:Z:another_test"))
        self.graph.add_edge(edge_)
        self.assertTrue (self.graph.as_graph_element ('virtual_1') == edge_)

        edge_ = ge.Edge.from_line(\
                       containment.Containment.from_string (\
                                "C\ta\t+\tb\t-\t10\t*\tui:Z:test\tab:Z:another_test"))
        self.graph.add_edge(edge_)
        self.assertTrue (self.graph.as_graph_element ('virtual_2') == edge_)

        edge_ = ge.Edge.from_line(\
                       link.Link.from_string(\
                                "L\t3\t+\t65\t-\t47M\tui:Z:test\tab:Z:another_test"))
        self.graph.add_edge(edge_)
        self.assertTrue (self.graph.as_graph_element ('virtual_3') == edge_)

        subgraph_ = sg.Subgraph.from_line(\
                       path.Path.from_string(\
                                "P\t14\t11+,12+\t122M\tui:Z:test\tab:Z:another_test"))
        self.graph.add_subgraph(subgraph_)
        self.assertTrue (self.graph.as_graph_element ('14') == subgraph_)

        subgraph_ = sg.Subgraph.from_line(\
                       group.OGroup.from_string(\
                                "O\t15\t11+ 11_to_13+ 13+\txx:i:-1"))
        self.graph.add_subgraph(subgraph_)
        self.assertTrue (self.graph.as_graph_element ('15') == subgraph_)

        subgraph_ = sg.Subgraph.from_line(\
                      group.UGroup.from_string(\
                                "U\t16sub\t2 3\txx:i:-1"))
        self.graph.add_subgraph(subgraph_)
        self.assertTrue (self.graph.as_graph_element ('16sub') == subgraph_)

        with self.assertRaises(gfa.InvalidElementError):
            self.graph.as_graph_element('None_id')

        # force node adding
        self.graph._graph.add_node("id42", some_other_field="42")
        self.assertTrue(self.graph.as_graph_element("id42") is None)

        self.graph._graph.add_node("id42", nid="id42", some_other_field="42")
        self.assertTrue(self.graph.as_graph_element("id42") is None)



    def test_from_string (self):
        self.graph.clear()
        self.graph.from_string(sample_gfa2)
        # 9 effective nodes and 2 node for the external fields in
        # the fragments
        self.assertTrue(len(self.graph.nodes()) ==  11)
        self.assertTrue(len(self.graph.edges()) ==  10)
        self.assertTrue(len(self.graph.subgraphs()) ==  4)

        self.graph.clear()
        self.graph.from_string(sample_gfa1)
        self.assertTrue(len(self.graph.nodes()) ==  9)
        self.assertTrue(len(self.graph.edges()) ==  6)
        self.assertTrue(len(self.graph.subgraphs()) ==  2)


    def test_get_subgraph(self):
        """Get the subgraph labelled 15 from samplefa2."""
        self.graph.clear()
        self.graph.from_string(sample_gfa2)

        subgraph_15 = self.graph.get_subgraph("15")
        self.assertTrue(subgraph_15 is not None)
        self.assertTrue(len(subgraph_15.nodes()) == 2)
        self.assertTrue(len(subgraph_15.edges()) == 1)
        self.assertTrue(subgraph_15.edge("11_to_13")['alignment'] == "120M")
        self.assertTrue(subgraph_15.edge("11_to_13")['alignment'] == \
                            self.graph.edge("11_to_13")['alignment'])

        subgraph_15.edge("11_to_13")['alignment'] = "42M"
        self.assertTrue(subgraph_15.edge("11_to_13")['alignment'] != \
                            self.graph.edge("11_to_13")['alignment'])

        with self.assertRaises(sg.InvalidSubgraphError):
            self.graph.get_subgraph("id42")


    def test_subgraph(self):
        """Test the subgraph interface to networkx method
        `subgraph`."""
        self.graph.clear()
        self.graph.from_string(sample_gfa2)
        subgraph_ = self.graph.subgraph(["1", "3", "11"])
        self.assertTrue(subgraph_ is not None)
        self.assertTrue(isinstance(subgraph_, nx.MultiGraph))
        self.assertTrue(len(subgraph_.nodes()) == 3)
        self.assertTrue(len(subgraph_.edges()) == 2)
        self.assertTrue(subgraph_.edge["1"]["11"]["1_to_11"] is not None)
        self.assertTrue(subgraph_.edge["1"]["3"]["1_to_3"] is not None)
        # test copy subgraph
        subgraph_.node["3"]["nid"] = 42
        self.assertTrue(subgraph_.node["3"] != self.graph.node("3"))

        # create a GFA graph using the subgraph as base graph
        gfa_ = gfa.GFA(subgraph_)
        self.assertTrue(gfa_.edge("1_to_3") is not None)
        self.assertTrue(subgraph_.edge["1"]["3"]["1_to_3"] == \
                             gfa_.edge("1_to_3"))

        subgraph_ = self.graph.subgraph(["1", "3", "11"], copy=False)
        subgraph_.node["3"]["nid"] = 42
        self.assertTrue(subgraph_.node["3"] == self.graph.node("3"))

    def test_dovetails_subgraph(self):
        """Use the dovetails_subgraph method on
        a GFA1 file, and test wheter the subgraphs
        contains edges that are not dovetails overlap
        edges.
        """
        self.graph.clear()
        self.graph.from_string(sample_gfa1)
        subgraph_ = self.graph.dovetails_subgraph()
        self.assertTrue(subgraph_ is not None)
        self.assertTrue(isinstance(subgraph_, nx.MultiGraph))
        self.assertTrue(len(subgraph_.nodes()) == 9)
        self.assertTrue(len(subgraph_.edges()) == 4)
        self.assertTrue(subgraph_.edge["1"]["2"]["1_to_2"] is not None)
        self.assertTrue(subgraph_.edge["1"]["3"]["1_to_3"] is not None)
        self.assertTrue(subgraph_.edge["11"]["13"]["11_to_13"] is not None)
        self.assertTrue(subgraph_.edge["11"]["12"]["11_to_12"] is not None)

        with self.assertRaises(KeyError):
            self.assertTrue(subgraph_.edge["2"]["6"]["2_to_6"] is None)
        with self.assertRaises(KeyError):
            self.assertTrue(subgraph_.edge["1"]["5"]["1_to_5"] is None)

        # test copy subgraph
        subgraph_.node["1"]["nid"] = 42
        self.assertTrue(subgraph_.node["1"] != self.graph.node("1"))

        # create a GFA graph using the subgraph as base graph
        gfa_ = gfa.GFA(subgraph_)
        self.assertTrue(gfa_.edge("1_to_3") is not None)
        self.assertTrue(subgraph_.edge["1"]["3"]["1_to_3"] == \
                             gfa_.edge("1_to_3"))

        subgraph_ = self.graph.subgraph(["1", "3", "11"], copy=False)
        subgraph_.node["3"]["nid"] = 42
        self.assertTrue(subgraph_.node["3"] == self.graph.node("3"))


    def test_search(self):
        """Perform some query operation on the graph,
        define custom iterator and see the results.

        1. Obtain all edges where `from_node` is 1.
        2. Obtain all the elements that have an xx optfield,
            ignoring its value.
        3. Perform the same operation as point2, but limit the search
            operation on subgraphs.

        4. Obtain all the nodes with 'slen' greater than
            or equal to 140.
        """
        self.graph.clear()
        self.graph.from_string(sample_gfa2)

        result = self.graph.search(lambda element: \
                                       element["from_node"] == "1",\
                                       limit_type=gfa.Element.EDGE)
        self.assertTrue("1_to_3" in result)
        self.assertTrue("1_to_11" in result)
        self.assertTrue("1_to_5" in result)
        self.assertTrue("1_to_2" in result)
        self.assertTrue(len(result) == 4)

        result = self.graph.search(lambda element: "xx" in element)
        self.assertTrue("11" in result)
        self.assertTrue("15" in result)
        self.assertTrue("2" in result)
        self.assertTrue(len(result) == 3)
        # A custom line also has xx, but it hasn't been added to the
        # graph.

        result = self.graph.search(lambda element: \
                                       "xx" in element,\
                                       limit_type=gfa.Element.SUBGRAPH)
        self.assertTrue("15" in result)
        self.assertTrue(len(result) == 1)

        greater_than_comparator = lambda element: int(element['slen']) >= 140
        result = self.graph.search(greater_than_comparator,
                                    limit_type=gfa.Element.NODE)
        self.assertTrue("13" in result)
        self.assertTrue("11" in result)
        self.assertTrue("12" in result)
        self.assertTrue("6" in result)
        self.assertTrue(len(result) == 4)


    def test_graph_equality(self):
        self.graph.clear()
        self.graph.from_string(sample_gfa2)
        same_graph = gfa.GFA()
        same_graph.from_string(sample_gfa2)
        self.assertTrue(self.graph == same_graph)
        another_equal_graph = gfa.GFA()
        another_equal_graph.from_string(self.graph.dump(2))
        self.assertTrue(another_equal_graph == self.graph)

        different_node = copy.deepcopy(another_equal_graph)
        different_node.node("3")['sequence'] += "ACGT"
        self.assertFalse(self.graph == different_node)

        # Make end nodes sequence empty and check if
        # virtuals comparison works
        different_edge = copy.deepcopy(another_equal_graph)
        different_edge.node("1")["sequence"] = "*"
        different_edge.node("2")["sequence"] = "*"
        edge_ = different_edge.edge("1_to_2")
        different_edge.remove_edge("1_to_2")
        different_edge._graph.add_edge("1", "2", key="*", **edge_)
        self.assertFalse(self.graph == different_edge)

        self.graph.clear()
        self.graph.from_string(sample_gfa1)
        same_graph = gfa.GFA()
        same_graph.from_string(sample_gfa1)
        self.assertTrue(self.graph == same_graph)
        another_equal_graph = gfa.GFA()
        another_equal_graph.from_string(self.graph.dump(1))
        self.assertTrue(another_equal_graph == self.graph)


    def test_neighborhood_operation(self):
        self.graph.clear()
        self.graph.from_string(sample_gfa1)

        neighbors_ = self.graph.neighbors("2")
        self.assertTrue("6" in neighbors_)
        self.assertTrue("1" in neighbors_)
        self.assertTrue("5" not in neighbors_)

        with self.assertRaises(gfa.GFAError):
            self.graph.neighbors("42")
Пример #8
0
class TestLine(unittest.TestCase):
    """The main approach is the same, to test the serialization either
    with dictionaries and with objects.

    The dictionaries are taken directly after adding the element to the
    GFA graph, this way it's possible to test the effective goal of the
    serializer.

    For dictionaries, it's tricky to test the opt_field they
    could contain due to the fact that order is not
    guaranteed. So, at least in case of dictionaries, a limit
    of 1 opt_field is taken.

    :Note:
        Especially with the serializer, 100% coverage cannot be expected
        due to the large amount of possible configurations. A general
        coverage either from dictionary and from objects will be the
        main goal.
    """
    graph = gfa.GFA()

    def test_serialize_gfa1_node(self):
        self.graph.clear()
        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string("S\t1\tACGT\tLN:i:42\txx:Z:test"))
        self.graph.add_node(node_)
        self.assertTrue(
            gs1.serialize_node(node_) == "S\t1\tACGT\tLN:i:42\txx:Z:test")
        self.assertTrue(
            gs1.serialize_node(self.graph.nodes(
                identifier="1")) == "S\t1\tACGT\tLN:i:42\txx:Z:test")

        self.graph.clear()
        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string("S\t1\tACGT"))
        self.graph.add_node(node_)
        node2 = node.Node.from_line(\
                        segment.SegmentV1.from_string("S\t2\t*"))
        self.graph.add_node(node2)

        self.assertTrue(gs1.serialize_node(node_) == "S\t1\tACGT\tLN:i:4")
        self.assertTrue(
            gs1.serialize_node(self.graph.nodes(
                identifier="1")) == "S\t1\tACGT\tLN:i:4")
        self.assertTrue(
            gs1.serialize_node(self.graph.nodes(identifier="2")) == "S\t2\t*")

        del (self.graph.nodes(identifier="1")['sequence'])
        self.assertTrue(
            gs1.serialize_node(self.graph.nodes(identifier="1")) == "")

        invalid_node = copy.deepcopy(node_)
        invalid_node._sequence = None
        self.assertTrue(gs1.serialize_node(invalid_node) == "")

        invalid_node = copy.deepcopy(node_)
        del (invalid_node._sequence)
        self.assertTrue(gs1.serialize_node(invalid_node) == "")

    def test_serialize_gfa1_edge(self):
        self.graph.clear()
        link_ = ge.Edge.from_line(\
                        link.Link.from_string(\
                                    "L\t1\t+\t3\t+\t12M\tID:Z:1_to_3\txx:Z:test"))

        link_without_id = ge.Edge.from_line(\
                        link.Link.from_string(\
                                    "L\t1\t+\t3\t+\t12M\txx:Z:test"))

        containment_ = ge.Edge.from_line(\
                        containment.Containment.from_string(\
                                    "C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test"))

        containment_with_trace = ge.Edge.from_line(\
                        containment.Containment.from_string(\
                                    "C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test"))
        containment_with_trace._alignment = "42,42"

        containment_without_id = ge.Edge.from_line(\
                        containment.Containment.from_string(\
                                    "C\t1\t+\t5\t+\t12\t120M\txx:Z:test"))

        fragment_ = ge.Edge.from_line(\
                        fragment.Fragment.from_string(\
                                    "F\t2\tread1+\t0\t42\t12\t55\t*\tid:Z:read1_in_2\txx:Z:test"))
        edge_link = ge.Edge.from_line(\
                        edge.Edge.from_string(\
                                    "E\t2_to_6\t2+\t6+\t0\t122$\t0\t132\t42,42,42\txx:Z:test"))
        # TODO: RECOGNIZE THIS CONTAINMENT
        edge_containment = ge.Edge.from_line(\
                        edge.Edge.from_string(\
                                    "E\t2_to_6\t2+\t6+\t0\t122$\t10\t132\t42,42,42\txx:Z:test"))
        gap_ = ge.Edge.from_line(\
                        gap.Gap.from_string(\
                                    "G\t2_to_12\t2-\t12+\t500\t50\txx:Z:test"))

        self.assertTrue(
            gs1.serialize_edge(link_) ==
            "L\t1\t+\t3\t+\t12M\tID:Z:1_to_3\txx:Z:test")
        self.assertTrue(gs1.serialize_edge(link_without_id) == \
                            "L\t1\t+\t3\t+\t12M\txx:Z:test")
        self.assertTrue(gs1.serialize_edge(containment_) == \
                            "C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test")

        self.assertTrue(gs1.serialize_edge(containment_with_trace) == \
                            "C\t1\t+\t5\t+\t12\t*\tID:Z:1_to_5\txx:Z:test")

        self.assertTrue(gs1.serialize_edge(containment_without_id) == \
                            "C\t1\t+\t5\t+\t12\t120M\txx:Z:test")

        self.assertTrue(gs1.serialize_edge(fragment_) == "")
        # the edge alignment is a trace, which is not valid in GFA1,
        # so a * is placed.
        self.assertTrue(
            gs1.serialize_edge(edge_link) ==
            "L\t2\t+\t6\t+\t*\tID:Z:2_to_6\txx:Z:test")
        self.assertTrue(gs1.serialize_edge(gap_) == "")

        self.graph.add_edge("L\t1\t+\t3\t+\t12M\tID:Z:1_to_3\txx:Z:test")
        self.graph.add_edge("C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test")

        # virtual_0 here
        self.graph.add_edge(
            "F\t2\tread1+\t0\t42\t12\t55\t*\tid:Z:read1_in_2\txx:Z:test")
        self.graph.add_edge(
            "E\t2_to_6\t2+\t6+\t0\t122$\t0\t132\t42,42,42\txx:Z:test")
        self.graph.add_edge("G\t2_to_12\t2-\t12+\t500\t50\txx:Z:test")

        # virtual_1 here
        self.graph.add_edge("C\t1\t+\t5\t+\t12\t120M\txx:Z:test")
        # virtual_2 here
        self.graph.add_edge("L\t1\t+\t3\t+\t12M\txx:Z:test")

        self.assertTrue(gs1.serialize_edge(self.graph.edges(identifier = "1_to_3"), "1_to_3") == \
                            "L\t1\t+\t3\t+\t12M\tID:Z:1_to_3\txx:Z:test")
        self.assertTrue(gs1.serialize_edge(self.graph.edges(identifier = "1_to_5"), "1_to_5") == \
                            "C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test")

        self.graph.edges(identifier="1_to_5")['alignment'] = "42,42"
        self.assertTrue(gs1.serialize_edge(self.graph.edges(identifier = "1_to_5"), "1_to_5") == \
                            "C\t1\t+\t5\t+\t12\t*\tID:Z:1_to_5\txx:Z:test")

        self.assertTrue(
            gs1.serialize_edge(self.graph.edges(
                identifier="virtual_0"), "virtual_0") == "")
        self.assertTrue(gs1.serialize_edge(self.graph.edges(identifier = "2_to_6"), "2_to_6") == \
                            "L\t2\t+\t6\t+\t*\tID:Z:2_to_6\txx:Z:test")
        self.assertTrue(
            gs1.serialize_edge(self.graph.edges(
                identifier="2_to_12"), "2_to_12") == "")
        self.assertTrue(gs1.serialize_edge(self.graph.edges(identifier = "virtual_1"), "virtual_1") == \
                            "C\t1\t+\t5\t+\t12\t120M\txx:Z:test")
        self.assertTrue(gs1.serialize_edge(self.graph.edges(identifier = "virtual_2"), "virtual_2") == \
                            "L\t1\t+\t3\t+\t12M\txx:Z:test")

    def test_serialize_gfa1_subgraph(self):
        self.graph.clear()
        self.graph.add_node("S\t11\t140\t*\txx:i:11")
        self.graph.add_node("S\t13\t150\t*")
        self.graph.add_edge("E\t11_to_13\t11+\t13+\t20\t140$\t0\t120\t120M")


        path_ = sg.Subgraph.from_line(\
                                path.Path.from_string(\
                                    "P\t15\t11+,13+\t120M"))

        ogroup = sg.Subgraph.from_line(\
                               group.OGroup.from_string(\
                                    "O\t15\t11+ 11_to_13+ 13+\txx:i:-1"))

        ugroup = sg.Subgraph.from_line(\
                               group.UGroup.from_string(\
                                    "U\t16\t11 13 11_to_13 16sub"))

        self.assertTrue(gs1.serialize_subgraph(path_, "path id:15") == \
                            "P\t15\t11+,13+\t120M")
        # overlaps are not defined, so a * is placed instead
        self.assertTrue(gs1.serialize_subgraph(ogroup, "ogroup: 15") == \
                            "P\t15\t11+,11_to_13+,13+\t*\txx:i:-1")
        # elements that don't point to node are removed if a gfa is
        # provided
        self.assertTrue(gs1.serialize_subgraph(ogroup, "ogroup: 15", self.graph) == \
                            "P\t15\t11+,13+\t*\txx:i:-1")

        self.assertTrue(gs1.serialize_subgraph(ugroup, "ugroup: 16", self.graph) == \
                            "")

        # test with dictionaries
        self.graph.add_subgraph("P\t15\t11+,13+\t120M")
        self.assertTrue(gs1.serialize_subgraph(self.graph.subgraphs("15").as_dict(), "graph -> path id:15") == \
                            "P\t15\t11+,13+\t120M")
        self.graph.remove_subgraph("15")

        self.graph.add_subgraph("O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.assertTrue(gs1.serialize_subgraph(self.graph.subgraphs("15").as_dict(), "graph -> ogroup id:15") == \
                           "P\t15\t11+,11_to_13+,13+\t*\txx:i:-1")
        self.graph.remove_subgraph("15")

        self.graph.add_subgraph("O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.assertTrue(gs1.serialize_subgraph(self.graph.subgraphs("15").as_dict(), \
                                                   "graph -> ogroup id:15", \
                                                   self.graph) == "P\t15\t11+,13+\t*\txx:i:-1")
        self.graph.remove_subgraph("15")

        self.graph.add_subgraph("U\t16\t11 13 11_to_13 16sub")
        self.assertTrue(gs1.serialize_subgraph(self.graph.subgraphs("16").as_dict(), \
                            "graph -> ugroup id:16") == \
                                "")
        self.graph.remove_subgraph("16")

    def test_serialize_gfa1_graph(self):
        self.graph.clear()

        mini_graph = str.join("", ["S\t11\t*\txx:i:11\n", \
                                       "S\t13\t*\n", \
                                       "L\t11\t+\t13\t+\t120M\n", \
                                       "P\t15\t11+,13+\t120M\n"])
        self.graph.from_string(mini_graph)
        same_graph_repr = gs1.serialize_gfa(self.graph)
        same_graph = gfa.GFA()
        same_graph.from_string(same_graph_repr)

        self.assertTrue(
            self.graph.nodes(data=True) == same_graph.nodes(data=True))
        self.assertTrue(
            self.graph.edges(adj_dict=True) == same_graph.edges(adj_dict=True))
        self.assertTrue(self.graph.subgraphs() == same_graph.subgraphs())


################################################################################

    def test_serialize_gfa2_node(self):
        self.graph.clear()
        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string("S\t1\tACGT\tLN:i:42\txx:Z:test"))
        node_v2 = node.Node.from_line(\
                        segment.SegmentV2.from_string("S\t1\t122\t*\txx:Z:test"))
        self.assertTrue(gs2.serialize_node(node_) == \
                            "S\t1\t42\tACGT\txx:Z:test")
        self.assertTrue(gs2.serialize_node(node_v2) == \
                            "S\t1\t122\t*\txx:Z:test")

        self.graph.add_node(node_)

        self.assertTrue(gs2.serialize_node(\
                            self.graph.nodes(identifier = "1"), \
                            "gfa2_node 1 from SegmentV1") == \
                                "S\t1\t42\tACGT\txx:Z:test")
        self.graph.remove_node("1")

        self.graph.add_node(node_v2)
        self.assertTrue(gs2.serialize_node(\
                            self.graph.nodes(identifier = "1"), \
                            "gfa2_node 1 from SegmentV2") == \
                            "S\t1\t122\t*\txx:Z:test")
        self.graph.remove_node("1")

        self.graph.clear()
        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string("S\t1\tACGT"))
        self.graph.add_node(node_)
        self.assertTrue(gs2.serialize_node(\
                                node_, \
                                "gfa2 node 1 without length") == \
                            "S\t1\t4\tACGT")

        node_ = node.Node.from_line(\
                        segment.SegmentV1.from_string("S\t2\tACGT\tLN:i:42\txx:Z:test"))
        self.graph.add_node(node_)
        self.assertTrue(gs2.serialize_node( \
                                self.graph.nodes(identifier = "2"), \
                                "gfa2 node 2 with length") == \
                            "S\t2\t42\tACGT\txx:Z:test")

        del (self.graph.nodes(identifier="1")['sequence'])
        self.assertTrue(
            gs2.serialize_node(self.graph.nodes(identifier="1")) == "")

        invalid_node = copy.deepcopy(node_)
        invalid_node._sequence = None
        self.assertTrue(gs2.serialize_node(invalid_node) == "")

        invalid_node = copy.deepcopy(node_)
        del (invalid_node._sequence)
        self.assertTrue(gs2.serialize_node(invalid_node) == "")

    def test_serialize_gfa2_edge(self):
        self.graph.clear()
        link_ = ge.Edge.from_line(\
                        link.Link.from_string(\
                                    "L\t1\t+\t3\t+\t12M\tID:Z:1_to_3\txx:Z:test"))

        link_without_id = ge.Edge.from_line(\
                        link.Link.from_string(\
                                    "L\t1\t+\t3\t+\t12M\txx:Z:test"))

        containment_ = ge.Edge.from_line(\
                        containment.Containment.from_string(\
                                    "C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test"))

        containment_with_trace = ge.Edge.from_line(\
                        containment.Containment.from_string(\
                                    "C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test"))
        containment_with_trace._alignment = "42,42"

        containment_without_id = ge.Edge.from_line(\
                        containment.Containment.from_string(\
                                    "C\t1\t+\t5\t+\t12\t120M\txx:Z:test"))

        fragment_ = ge.Edge.from_line(\
                        fragment.Fragment.from_string(\
                                    "F\t2\tread1+\t0\t42\t12\t55\t*\tid:Z:read1_in_2"))
        edge_ = ge.Edge.from_line(\
                        edge.Edge.from_string(\
                                    "E\t2_to_6\t2+\t6+\t0\t122$\t10\t132\t42,42,42\txx:Z:test"))
        gap_ = ge.Edge.from_line(\
                        gap.Gap.from_string(\
                                    "G\t2_to_12\t2-\t12+\t500\t50\txx:Z:test"))

        self.assertTrue(gs2.serialize_edge(link_, \
                                               "gfa2 link") == "")
        self.assertTrue(gs2.serialize_edge(link_without_id, \
                                               "gfa2 link2") == "")
        self.assertTrue(gs2.serialize_edge(containment_, \
                                               "gfa2 containment") == "")
        self.assertTrue(gs2.serialize_edge(containment_with_trace, \
                                               "gfa2 containment") == "")
        self.assertTrue(gs2.serialize_edge(containment_without_id, \
                                               "gfa2 containment") == "")

        self.assertTrue(gs2.serialize_edge(\
                        fragment_, \
                        "gfa2 fragment")  == \
                "F\t2\tread1+\t0\t42\t12\t55\t*\tid:Z:read1_in_2")

        # the edge alignment is a trace, which is not valid in GFA1,
        # so a * is placed.
        self.assertTrue(gs2.serialize_edge(\
                        edge_, \
                        "gfa2 edge") == \
                "E\t2_to_6\t2+\t6+\t0\t122$\t10\t132\t42,42,42\txx:Z:test")

        self.assertTrue(gs2.serialize_edge( \
                        gap_, \
                        "gfa2 gap") == \
                "G\t2_to_12\t2-\t12+\t500\t50\txx:Z:test")

        # test dictionaries
        self.graph.add_edge("L\t1\t+\t3\t+\t12M\tID:Z:1_to_3\txx:Z:test")
        self.graph.add_edge("C\t1\t+\t5\t+\t12\t120M\tID:Z:1_to_5\txx:Z:test")
        # virtual_0 here
        self.graph.add_edge("F\t2\tread1+\t0\t42\t12\t55\t*\tid:Z:read1_in_2")
        self.graph.add_edge(
            "E\t2_to_6\t2+\t6+\t0\t122$\t10\t132\t42,42,42\txx:Z:test")
        self.graph.add_edge("G\t2_to_12\t2-\t12+\t500\t50\txx:Z:test")
        # virtual_1 here
        self.graph.add_edge("C\t1\t+\t5\t+\t12\t120M\txx:Z:test")
        # virtual_2 here
        self.graph.add_edge("L\t1\t+\t3\t+\t12M\txx:Z:test")

        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "1_to_3"), \
                            "gfa2 link 1_to_3") == "")
        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "1_to_5"), \
                            "gfa2 containment 1_to_5") == "")

        self.graph.edges(identifier="1_to_5")['alignment'] = "42,42"
        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "1_to_5"), \
                            "gfa2 containment: 1_to_5") == "")

        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "virtual_0"), \
                            "gfa2 fragment: virtual_0") == \
                "F\t2\tread1+\t0\t42\t12\t55\t*\tid:Z:read1_in_2")
        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "2_to_6"), \
                            "gfa2 edge: 2_to_6") == \
                "E\t2_to_6\t2+\t6+\t0\t122$\t10\t132\t42,42,42\txx:Z:test")
        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "2_to_12"), \
                            "gfa2 gap: 2_to_12") == \
               "G\t2_to_12\t2-\t12+\t500\t50\txx:Z:test")
        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "virtual_1"), \
                            "gfa2 containment without id: virtual_1") == "")
        self.assertTrue(gs2.serialize_edge(\
                            self.graph.edges(identifier = "virtual_2"), \
                            "gfa2 link without id: virtual_2") == "")

    def test_serialize_gfa2_subgraph(self):
        self.graph.clear()
        self.graph.add_node("S\t11\t140\t*\txx:i:11")
        self.graph.add_node("S\t13\t150\t*")
        self.graph.add_edge("E\t11_to_13\t11+\t13+\t20\t140$\t0\t120\t120M")


        path_ = sg.Subgraph.from_line(\
                                path.Path.from_string(\
                                    "P\t15\t11+,13+\t120M"))
        ogroup = sg.Subgraph.from_line(\
                               group.OGroup.from_string(\
                                    "O\t15\t11+ 11_to_13+ 13+\txx:i:-1"))
        ugroup = sg.Subgraph.from_line(\
                               group.UGroup.from_string(\
                                    "U\t16\t11 13 11_to_13 16sub"))
        self.assertTrue(gs2.serialize_subgraph(\
                            path_, \
                            "gfa2 path id:15") == \
                        "O\t15\t11+ 13+")
        self.assertTrue(gs2.serialize_subgraph(\
                            ogroup, \
                            "gfa2 ogroup: 15") == \
                        "O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.assertTrue(gs2.serialize_subgraph(\
                            ogroup, \
                            "gfa2 ogroup: 15", \
                            self.graph) == \
                        "O\t15\t11+ 11_to_13+ 13+\txx:i:-1")

        self.assertTrue(gs2.serialize_subgraph(\
                            ugroup, \
                            "ugroup: 16",
                            self.graph) == \
                            "U\t16\t11 13 11_to_13 16sub")

        # test with dictionaries
        self.graph.add_subgraph("P\t15\t11+,13+\t120M")
        self.assertTrue(gs2.serialize_subgraph(\
                            self.graph.subgraphs("15").as_dict(), \
                            "gfa2 graph -> path id:15") == \
                        "O\t15\t11+ 13+")
        self.graph.remove_subgraph("15")

        self.graph.add_subgraph("O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.assertTrue(gs2.serialize_subgraph(\
                            self.graph.subgraphs("15").as_dict(), \
                            "graph -> ogroup id:15") == \
                        "O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.graph.remove_subgraph("15")

        self.graph.add_subgraph("O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.assertTrue(gs2.serialize_subgraph(\
                            self.graph.subgraphs("15").as_dict(), \
                            "graph -> ogroup id:15", \
                            self.graph) == \
                        "O\t15\t11+ 11_to_13+ 13+\txx:i:-1")
        self.graph.remove_subgraph("15")

        self.graph.add_subgraph("U\t16\t11 13 11_to_13 16sub")
        self.assertTrue(gs2.serialize_subgraph(\
                            self.graph.subgraphs("16").as_dict(), \
                            "graph -> ugroup id:16") == \
                        "U\t16\t11 13 11_to_13 16sub")
        self.graph.remove_subgraph("16")

    def test_serialize_gfa2_graph(self):
        self.graph.clear()
        mini_graph = str.join("", ["S\t11\t42\t*\txx:i:11\n", \
                                    "S\t21\t13\t*\n", \
                                    "E\t15\t11+\t13+\t21\t42\t42\t21\t120M\n", \
                                    "O\t33\t11+ 13+\n"])
        self.graph.from_string(mini_graph)
        same_graph_repr = gs2.serialize_gfa(self.graph)
        same_graph = gfa.GFA()
        same_graph.from_string(same_graph_repr)

        self.assertTrue(
            self.graph.nodes(data=True) == same_graph.nodes(data=True))
        self.assertTrue(
            self.graph.edges(adj_dict=True) == same_graph.edges(adj_dict=True))
        self.assertTrue(self.graph.subgraphs() == same_graph.subgraphs())

        another_equal_graph_repr = gs2.serialize_gfa(same_graph)
        another_equal_graph = gfa.GFA()
        another_equal_graph.from_string(another_equal_graph_repr)

        self.assertTrue(
            another_equal_graph.nodes(data=True) == same_graph.nodes(
                data=True))
        self.assertTrue(
            another_equal_graph.edges(adj_dict=True) == same_graph.edges(
                adj_dict=True))
        self.assertTrue(another_equal_graph.subgraphs() == \
                            same_graph.subgraphs())