def test_consistent_ids(self) -> None: """ This test verifies that `to_canonical_graph` creates consistent identifiers for blank nodes even when the graph changes. It does this by creating two triple sets `g0_ts` and `g1_ts` and then first creating a canonical graph with only the first triple set (cg0), and then a canonical graph with both triple sets (cg1), and then confirming the triples in cg0 is a subset of cg1. This will fail if the `to_canonical_graph` does not generate consistent identifiers for blank nodes when the graph changes. This property is essential for `to_canonical_graph` to be useful for diffing graphs. """ bnode = BNode() g0_ts: _TripleSet = { (bnode, FOAF.name, Literal("Golan Trevize")), (bnode, RDF.type, FOAF.Person), } bnode = BNode() g1_ts: _TripleSet = { (bnode, FOAF.name, Literal("Janov Pelorat")), (bnode, RDF.type, FOAF.Person), } g0 = Graph() g0 += g0_ts cg0 = to_canonical_graph(g0) cg0_ts = GraphHelper.triple_set(cg0) g1 = Graph() g1 += g1_ts cg1 = to_canonical_graph(g1) cg1_ts = GraphHelper.triple_set(cg1) assert cg0_ts.issubset( cg1_ts ), "canonical triple set cg0_ts should be a subset of canonical triple set cg1_ts"
def test_issue494_collapsing_bnodes(): """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes""" g = Graph() g += [ (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['object'], URIRef(u'source')), (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['predicate'], BNode('vcb3')), (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['subject'], BNode('vcb2')), (BNode('Na1a8fbcf755f41c1b5728f326be50994'), RDF['type'], RDF['Statement']), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['object'], URIRef(u'target')), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['predicate'], BNode('vcb0')), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['subject'], URIRef(u'source')), (BNode('Na713b02f320d409c806ff0190db324f4'), RDF['type'], RDF['Statement']), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['object'], BNode('vr0KcS4')), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['predicate'], BNode('vrby3JV')), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['subject'], URIRef(u'source')), (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), RDF['type'], RDF['Statement']), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['object'], URIRef(u'source')), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['predicate'], BNode('vcb5')), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['subject'], URIRef(u'target')), (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), RDF['type'], RDF['Statement']), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['object'], URIRef(u'source')), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['predicate'], BNode('vcb4')), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['subject'], URIRef(u'source')), (BNode('Nec6864ef180843838aa9805bac835c98'), RDF['type'], RDF['Statement']), ] print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) print('triple_bnode degrees:') for triple_bnode in g.subjects(RDF['type'], RDF['Statement']): print(len(list(g.triples([triple_bnode, None, None])))) print('all node degrees:') g_node_degs = sorted([ len(list(g.triples([node, None, None]))) for node in g.all_nodes() ], reverse=True) print(g_node_degs) cg = to_canonical_graph(g) print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) print('triple_bnode degrees:') for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']): print(len(list(cg.triples([triple_bnode, None, None])))) print('all node degrees:') cg_node_degs = sorted([ len(list(cg.triples([node, None, None]))) for node in cg.all_nodes() ], reverse=True) print(cg_node_degs) assert len(g) == len(cg), \ 'canonicalization changed number of triples in graph' assert len(g.all_nodes()) == len(cg.all_nodes()), \ 'canonicalization changed number of nodes in graph' assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ 'canonicalization changed number of statements' assert g_node_degs == cg_node_degs, \ 'canonicalization changed node degrees'
def test_issue725_collapsing_bnodes_2(): g = Graph() g += [ (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v2')), (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v0')), (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v1')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v0')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v5')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v4')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), URIRef(u'urn:gp_learner:fixed_var:source')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v0')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('N86ac7ca781f546ae939b8963895f672e'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), BNode('v1')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), BNode('v3')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), URIRef(u'urn:gp_learner:fixed_var:target')), (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')) ] turtle = ''' @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . [] a rdf:Statement ; rdf:object [ ] ; rdf:predicate _:v0 ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object _:v1 ; rdf:predicate _:v0 ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object [ ] ; rdf:predicate [ ] ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object <urn:gp_learner:fixed_var:source> ; rdf:predicate _:v0 ; rdf:subject <urn:gp_learner:fixed_var:target> . [] a rdf:Statement ; rdf:object _:v1 ; rdf:predicate [ ] ; rdf:subject <urn:gp_learner:fixed_var:target> .''' # g = Graph() # g.parse(data=turtle, format='turtle') stats = {} cg = rdflib.compare.to_canonical_graph(g, stats=stats) # print ('graph g length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) # print ('triple_bnode degrees:') # for triple_bnode in g.subjects(rdflib.RDF['type'], rdflib.RDF['Statement']): # print (len(list(g.triples([triple_bnode, None, None])))) # print ('all node out-degrees:') # print (sorted( # [len(list(g.triples([node, None, None]))) for node in g.all_nodes()])) # print ('all node in-degrees:') # print (sorted( # [len(list(g.triples([None, None, node]))) for node in g.all_nodes()])) # print(g.serialize(format='n3')) # # print ('graph cg length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) # print ('triple_bnode degrees:') # for triple_bnode in cg.subjects(rdflib.RDF['type'], # rdflib.RDF['Statement']): # print (len(list(cg.triples([triple_bnode, None, None])))) # print ('all node out-degrees:') # print (sorted( # [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()])) # print ('all node in-degrees:') # print (sorted( # [len(list(cg.triples([None, None, node]))) for node in cg.all_nodes()])) # print(cg.serialize(format='n3')) assert (len(g.all_nodes()) == len(cg.all_nodes())) cg = to_canonical_graph(g) assert len(g) == len(cg), \ 'canonicalization changed number of triples in graph' assert len(g.all_nodes()) == len(cg.all_nodes()), \ 'canonicalization changed number of nodes in graph' assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ 'canonicalization changed number of statements' # counter for subject, predicate and object nodes g_pos_counts = Counter(), Counter(), Counter() for t in g: for i, node in enumerate(t): g_pos_counts[i][t] += 1 g_count_signature = [sorted(c.values()) for c in g_pos_counts] cg_pos_counts = Counter(), Counter(), Counter() for t in cg: for i, node in enumerate(t): cg_pos_counts[i][t] += 1 cg_count_signature = [sorted(c.values()) for c in cg_pos_counts] assert g_count_signature == cg_count_signature, \ 'canonicalization changed node position counts'
def test_issue494_collapsing_bnodes(): """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes""" g = Graph() g += [ (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["object"], URIRef("source")), (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["predicate"], BNode("vcb3")), (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["subject"], BNode("vcb2")), (BNode("Na1a8fbcf755f41c1b5728f326be50994"), RDF["type"], RDF["Statement"]), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["object"], URIRef("target")), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["predicate"], BNode("vcb0")), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["subject"], URIRef("source")), (BNode("Na713b02f320d409c806ff0190db324f4"), RDF["type"], RDF["Statement"]), (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["object"], BNode("vr0KcS4")), ( BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["predicate"], BNode("vrby3JV"), ), (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["subject"], URIRef("source")), (BNode("Ndb804ba690a64b3dbb9063c68d5e3550"), RDF["type"], RDF["Statement"]), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["object"], URIRef("source")), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["predicate"], BNode("vcb5")), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["subject"], URIRef("target")), (BNode("Ndfc47fb1cd2d4382bcb8d5eb7835a636"), RDF["type"], RDF["Statement"]), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["object"], URIRef("source")), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["predicate"], BNode("vcb4")), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["subject"], URIRef("source")), (BNode("Nec6864ef180843838aa9805bac835c98"), RDF["type"], RDF["Statement"]), ] # print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) # print('triple_bnode degrees:') # for triple_bnode in g.subjects(RDF['type'], RDF['Statement']): # print(len(list(g.triples([triple_bnode, None, None])))) # print('all node degrees:') g_node_degs = sorted( [len(list(g.triples([node, None, None]))) for node in g.all_nodes()], reverse=True, ) # print(g_node_degs) cg = to_canonical_graph(g) # print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) # print('triple_bnode degrees:') # for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']): # print(len(list(cg.triples([triple_bnode, None, None])))) # print('all node degrees:') cg_node_degs = sorted( [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()], reverse=True, ) # print(cg_node_degs) assert len(g) == len( cg), "canonicalization changed number of triples in graph" assert len(g.all_nodes()) == len( cg.all_nodes()), "canonicalization changed number of nodes in graph" assert len(list(g.subjects(RDF["type"], RDF["Statement"]))) == len( list(cg.subjects(RDF["type"], RDF["Statement"])) ), "canonicalization changed number of statements" assert g_node_degs == cg_node_degs, "canonicalization changed node degrees" # counter for subject, predicate and object nodes g_pos_counts = Counter(), Counter(), Counter() for t in g: for i, node in enumerate(t): g_pos_counts[i][t] += 1 g_count_signature = [sorted(c.values()) for c in g_pos_counts] cg = to_canonical_graph(g) cg_pos_counts = Counter(), Counter(), Counter() for t in cg: for i, node in enumerate(t): cg_pos_counts[i][t] += 1 cg_count_signature = [sorted(c.values()) for c in cg_pos_counts] assert (g_count_signature == cg_count_signature ), "canonicalization changed node position counts"