def check_graph(graph,type): """check if update or create by comparision with live graph""" if not graph: return False result=graph.query("""SELECT DISTINCT ?subject WHERE {?subject ?b ?c}""") """for every subject of changeset graph try to find other triples in DBpedia live to differ between add/update/delete""" events=[] for subject in result: resource=subject[0] if(resource.find(DBpedia.DBPEDIAURL)==0): #apply only for resources on server with DBPEDIA URL live_resource=DBpedia.liveize(resource) #online version of dbpedia live have different URIs as changeset URIs onl_graph=rdflib.Graph() #try: onl_graph.parse(live_resource) onl_iso = to_isomorphic(onl_graph) loc_iso = to_isomorphic(graph) in_both, in_onl, in_loc = graph_diff(onl_iso,loc_iso) event_type="notupdated" event=None for res_of_diff, b, c in in_onl: # if live graph has more triples about resource it should be an update if(str(live_resource)==str(res_of_diff)): event_type="update" break; if(event_type=="notupdated" and type=="added"): event = ResourceChange(uri=str(live_resource), changetype="CREATE") elif(event_type=="update" and type=="added"): event = ResourceChange(uri=str(live_resource), changetype="UPDATE") else: event = ResourceChange(uri=str(live_resource), changetype="DELETE") events.append(event) #except Exception as e: #print "Error parsing %s: %s" % (live_resource,e) #self.notify_observers(event) return events
def rdf_comparator(self, old_data: str, new_data: str) -> bool: old_graph = Graph() new_graph = Graph() old_graph.parse(data=old_data, format="turtle") new_graph.parse(data=new_data, format="turtle") old_iso = to_isomorphic(old_graph) # Remove the metadata specific triples for t in list(old_iso.triples((None, MMNS.generation_date, None))): old_iso.remove(t) new_iso = to_isomorphic(new_graph) for t in list(new_iso.triples((None, MMNS.generation_date, None))): new_iso.remove(t) # Graph compare takes a Looong time in_both, in_old, in_new = graph_diff(old_iso, new_iso) # if old_iso != new_iso: # in_both, in_old, in_new = graph_diff(old_iso, new_iso) old_len = len(list(in_old)) new_len = len(list(in_new)) if old_len or new_len: if old_len: print("----- Old graph only -----") self._print_triples(in_old) if new_len: print("----- New Grapn Only -----") self._print_triples(in_new) self.assertTrue(False, "RDF file mismatch") return False return True
def graphsAreTheSame(g1, g2, preprocessupdate=None): g1Copy = clone_graph(g1) g2Copy = clone_graph(g2) if preprocessupdate: g1Copy.update(preprocessupdate) g2Copy.update(preprocessupdate) return compare.to_isomorphic(g1Copy) == compare.to_isomorphic(g2Copy)
def run_test(t_identifier, expected_output, source_type): expected_output_graph = ConjunctiveGraph() if os.path.isfile(config["properties"]["output_results"]): os.system("rm " + config["properties"]["output_results"]) if expected_output: expected_output_graph.parse("./output.nq", format="nquads") os.system("mkdir results/" + t_identifier) os.system(config["properties"]["engine_command"] + " > results/" + t_identifier + "/" + source_type + ".log") # if there is output file if os.path.isfile(config["properties"]["output_results"]): extension = config["properties"]["output_results"].split(".")[-1] os.system("cp " + config["properties"]["output_results"] + " results/" + t_identifier + "/output-" + source_type + "." + extension) # and expected output is true if expected_output: output_graph = ConjunctiveGraph() iso_expected = compare.to_isomorphic(expected_output_graph) # trying to parse the output (e.g., not valid RDF) try: output_graph.parse( config["properties"]["output_results"], format=config["properties"]["output_format"]) iso_output = compare.to_isomorphic(output_graph) # and graphs are equal if iso_expected == iso_output: result = passed # and graphs are distinct else: print("Output RDF does not match with the expected RDF") result = failed # output is not valid RDF except: print("Output RDF is invalid") result = failed # and expected output is false else: print("Output RDF found but none was expected") result = failed # if there is not output file else: # and expected output is true if expected_output: print("No RDF output found while output was expected") result = failed # expected output is false else: result = passed results.append([ config["tester"]["tester_name"], config["engine"]["engine_name"], source_type, t_identifier, result ]) print(t_identifier + "," + result)
def test_issue682_signing_named_graphs(): ns = Namespace("http://love.com#") mary = BNode() john = URIRef("http://love.com/lovers/john#") cmary=URIRef("http://love.com/lovers/mary#") cjohn=URIRef("http://love.com/lovers/john#") store = IOMemory() g = ConjunctiveGraph(store=store) g.bind("love",ns) gmary = Graph(store=store, identifier=cmary) gmary.add((mary, ns['hasName'], Literal("Mary"))) gmary.add((mary, ns['loves'], john)) gjohn = Graph(store=store, identifier=cjohn) gjohn.add((john, ns['hasName'], Literal("John"))) ig = to_isomorphic(g) igmary = to_isomorphic(gmary) assert len(igmary) == len(gmary) assert len(ig) == len(g) assert len(igmary) < len(ig) assert ig.graph_digest() != igmary.graph_digest()
def test_issue655(self): # make sure that inf and nan are serialized correctly dt = XSD['double'].n3() self.assertEqual( Literal(float("inf"))._literal_n3(True), '"INF"^^%s' % dt ) self.assertEqual( Literal(float("-inf"))._literal_n3(True), '"-INF"^^%s' % dt ) self.assertEqual( Literal(float("nan"))._literal_n3(True), '"NaN"^^%s' % dt ) dt = XSD['decimal'].n3() self.assertEqual( Literal(Decimal("inf"))._literal_n3(True), '"INF"^^%s' % dt ) self.assertEqual( Literal(Decimal("-inf"))._literal_n3(True), '"-INF"^^%s' % dt ) self.assertEqual( Literal(Decimal("nan"))._literal_n3(True), '"NaN"^^%s' % dt ) self.assertEqual( Literal("inf", datatype=XSD['decimal'])._literal_n3(True), '"INF"^^%s' % dt ) # assert that non-numerical aren't changed self.assertEqual( Literal('inf')._literal_n3(True), '"inf"' ) self.assertEqual( Literal('nan')._literal_n3(True), '"nan"' ) PROV = Namespace('http://www.w3.org/ns/prov#') bob = URIRef("http://example.org/object/Bob") # g1 is a simple graph with an infinite and a nan values g1 = Graph() g1.add((bob, PROV.value, Literal(float("inf")))) g1.add((bob, PROV.value, Literal(float("nan")))) # Build g2 out of the deserialisation of g1 serialisation g2 = Graph() g2.parse(data=g1.serialize(format='turtle'), format='turtle') self.assertTrue(to_isomorphic(g1) == to_isomorphic(g2))
def run_test(self, tested_file=None, result_file=None, metadata_url=None, mode=CONST_STANDARD_MODE): result_graph_url = self._tests_location + result_file metadata_url = self._tests_location + metadata_url if metadata_url is not None else None csv_url = self._tests_location + tested_file if tested_file is not None else None converted = CSVWConverter.to_rdf(csv_url, metadata_url, mode) expected = Graph() expected.parse(result_graph_url) self.change_urls_in_result(expected) self.assertEqual(to_isomorphic(converted), to_isomorphic(expected))
def test_graph_diff(g1, g2): in_both, only_in_first, only_in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) only_in_first.namespace_manager = g1.namespace_manager only_in_second.namespace_manager = g2.namespace_manager ok_(len(only_in_second) == 0, f""" <<< {only_in_first.serialize(format='n3').decode('utf-8')} === {only_in_second.serialize(format='n3').decode('utf-8')} >>> """)
def test_compiler_v1_4(): compiler = RDFCompiler() with open("tests/data/metadata_v14.ttl", "r") as _input_file: expected_graph = Graph() expected_graph.parse(data=_input_file.read(), format="ttl") _ = compiler.visit(metadata_v_1_4) expected = to_isomorphic(expected_graph) got = to_isomorphic(compiler.graph) for (t1, t2) in _squashed_graphs_triples(expected, got): assert t1 == t2 assert isomorphic(expected, got)
def test_creates_entity_with_type(factory): ross = factory("rf_me") ross.rdf_type.add(factory('foaf_Person')) expected = Graph() expected.add(( URIRef('http://rossfenning.co.uk/#me'), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef('http://xmlns.com/foaf/0.1/Person'))) assert to_isomorphic(factory.store) == to_isomorphic(expected)
def test_load_single(self, mock_sparql_delete, mock_sparql_insert): with Store(self.data_path) as store: store.add("0000-0003-1527-0030") (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ store["0000-0003-1527-0030"] self.assertIsNone(last_update) graph1, add_graph1, delete_graph1 = load_single( "0000-0003-1527-0030", None, None, None, self.data_path, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") self.assertEqual(319, len(add_graph1)) self.assertEqual(0, len(delete_graph1)) self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1)) with Store(self.data_path) as store: # Last update now set (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ store["0000-0003-1527-0030"] self.assertIsNotNone(last_update) # Make sure turtle file created self.assertTrue( os.path.exists( os.path.join(self.data_path, "0000-0003-1527-0030.ttl"))) # Now change a fact and run again. Changed fact is provided by vcr recording. # Changed year of Amherst degree. # Had to rig the Accept-Encoding to create the vcr recording with: # r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid, # headers={"Accept": "application/json", "Accept-Encoding": "identity"}) graph2, add_graph2, delete_graph2 = load_single( "0000-0003-1527-0030", None, None, None, self.data_path, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") self.assertEqual(319, len(graph2)) self.assertEqual(17, len(add_graph2)) self.assertEqual(17, len(delete_graph2)) mock_sparql_insert.assert_has_calls([ call(add_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"), call(add_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") ]) mock_sparql_delete.assert_has_calls([ call(delete_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"), call(delete_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") ])
def do_algorithm(self, source_content: str, target_content: str) -> List[SyncOperation]: source_g = Graph().parse(format='turtle', data=source_content) target_g = Graph().parse(format='turtle', data=target_content) source_g_iso = to_isomorphic(source_g) target_g_iso = to_isomorphic(target_g) _, removals_graph, additions_graph = graph_diff( source_g_iso, target_g_iso) additions_ops = self._create_add_ops_from(additions_graph) removals_ops = self._create_remove_ops_from(removals_graph) return removals_ops + additions_ops
def test_uses_alias(factory): factory.addAlias('favourite_cheese', 'http://rossfenning.co.uk/#favourite-cheese') ross = factory("rf_me") ross.favourite_cheese.add('Stinking Bishop') expected = Graph() expected.add(( URIRef('http://rossfenning.co.uk/#me'), URIRef('http://rossfenning.co.uk/#favourite-cheese'), Literal('Stinking Bishop'))) assert to_isomorphic(factory.store) == to_isomorphic(expected)
def compare_graphs(actual, expected): actual_iso = to_isomorphic(actual) expected_iso = to_isomorphic(expected) if actual_iso != expected_iso: _, in_first, in_second = graph_diff(actual_iso, expected_iso) print("The actual and expected graphs differ") print("----- Contents of actual graph not in expected graph -----") dump_ttl_sorted(in_first) print("----- Contents of expected graph not in actual graph -----") dump_ttl_sorted(in_second) assert actual_iso == expected_iso
def test_adds_props_during_construction(store): factory = ThingFactory(store) # We must use a list for the value as name is not a functional property (can only have one value) _ = factory("rf_me", foaf_name=['Ross Fenning']) expected = Graph() expected.add(( URIRef('http://rossfenning.co.uk/#me'), URIRef('http://xmlns.com/foaf/0.1/name'), Literal('Ross Fenning'))) assert to_isomorphic(store) == to_isomorphic(expected)
def main(): values = ap.parse_args() format1 = guess_format(values.file1) format2 = guess_format(values.file2) g1: Graph = Graph().parse(values.file1, format=format1) g2: Graph = Graph().parse(values.file2, format=format2) iso1: IsomorphicGraph = to_isomorphic(g1) iso2: IsomorphicGraph = to_isomorphic(g2) _in_both, in_first, in_second = graph_diff(iso1, iso2) print(f"Only in {values.file1}") dump_nt_sorted(in_first) print(f"Only in {values.file2}") dump_nt_sorted(in_second)
def process(self,_, f): local_file = download(f, "btc-2014", "archives/") graph = ConjunctiveGraph('Sleepycat') # first time create the store: graph.open(local_file+".triplestore", create = True) for line in gzip.GzipFile(fileobj=open(local_file, 'rb')): try: graph.parse(data=line, format="nquads") except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(line, e)) results = {} for context in graph.contexts(): try: uri = context.identifier g = ConjunctiveGraph() g += context sys.stderr.write("Processing %s with %s triples...\n"%(uri, len(g))) sys.stderr.flush() stats = collections.defaultdict(str) stats["id"] = uri ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime'])) except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(uri, e)) stats['error'] = str(e) sys.stderr.flush() stats_line = [unicode(stats[c]).encode('ascii', 'ignore') for c in stat_cols] results[uri] = stats_line sys.stderr.write("Digested %s into %s graphs.\n"%(f, len(results))) results_string = '\n'.join([','.join(row) for row in results.values()]) yield "benchmark", results_string
def digest_graph(self, uri, nquads): nquads = '\n'.join(list(nquads)) sys.stderr.write("Processing %s (%d)...\n" % (uri, len(nquads))) stats = collections.defaultdict(str) stats["id"] = uri try: g = ConjunctiveGraph() g.parse(data=nquads, format="nquads") stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None sys.stderr.flush() stats['lines'] = len(nquads.split('\n')) ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) #stats['graph_digest'] = graph_digest except Exception as e: sys.stderr.write("ERROR: %s %s\n" % (stats['id'], e)) stats['error'] = str(e) #if stats['to_hash_runtime'] > 0.1: sys.stderr.write("Processed %s with %s triples in %s sec.\n" % (uri, len(g), stats['to_hash_runtime'])) stats_line = [str(stats[c]) for c in stat_cols] result_string = StringIO.StringIO() result_writer = csv.writer(result_string) result_writer.writerow(stats_line) yield "benchmark", result_string.getvalue()
def digest_graph(self, uri, nquads): nquads = '\n'.join(list(nquads)) sys.stderr.write("Processing %s (%d)...\n"%(uri, len(nquads))) stats = collections.defaultdict(str) stats["id"] = uri try: g = ConjunctiveGraph() g.parse(data=nquads, format="nquads") stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None sys.stderr.flush() stats['lines'] = len(nquads.split('\n')) ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) #stats['graph_digest'] = graph_digest except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(stats['id'], e)) stats['error'] = str(e) #if stats['to_hash_runtime'] > 0.1: sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime'])) stats_line = [str(stats[c]) for c in stat_cols] result_string = StringIO.StringIO() result_writer = csv.writer(result_string) result_writer.writerow(stats_line) yield "benchmark", result_string.getvalue()
def digest_multigraph(self, _, nquads): try: nquads = '\n'.join(list(nquads)) g = ConjunctiveGraph() g.parse(data=nquads, format="nquads") except: return result = None for graph in g.contexts(): sys.stderr.write("Processing %s...\n"%graph.identifier) stats = collections.defaultdict(str) stats["id"] = graph.identifier try: ig = to_isomorphic(graph) graph_digest = ig.graph_digest(stats) #stats['graph_digest'] = graph_digest except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(stats['id'], e)) stats['error'] = str(e) #if stats['to_hash_runtime'] > 0.1: sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(graph.identifier, len(graph), stats['to_hash_runtime'])) stats_line = [str(stats[c]) for c in stat_cols] result_string = StringIO.StringIO() result_writer = csv.writer(result_string) result_writer.writerow(stats_line) if result is None: result = result_string.getvalue() else: result = result + "\n" + result_string.getvalue() yield "benchmark", result
def process_one(uri): stats = collections.defaultdict(str) stats["id"] = uri stats['debug'] = True try: g = ConjunctiveGraph() g.parse(data=requests.get(uri, headers={ "Accept": "application/rdf+xml" }).text, format="xml") #print g.serialize(format="turtle") stats['lines'] = len(g) sys.stderr.write("Processing %s (%d)...\n" % (uri, stats['lines'])) sys.stderr.flush() stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) sys.stderr.write("Processed %s with %s triples in %s sec.\n" % (uri, len(g), stats['to_hash_runtime'])) except Exception as e: sys.stderr.write("ERROR: %s %s\n" % (uri, e)) stats['error'] = str(e) sys.stderr.flush() print '\n'.join( ['%s:\t%s' % (key, str(value)) for key, value in stats.items()]) return [str(stats[c]) for c in stat_cols]
def write(self): doc_node = self.create_doc() # Add creation info creation_info_node = self.create_creation_info() ci_triple = (doc_node, self.spdx_namespace.creationInfo, creation_info_node) self.graph.add(ci_triple) # Add review info review_nodes = self.reviews() for review in review_nodes: self.graph.add((doc_node, self.spdx_namespace.reviewed, review)) # Add extracted licenses licenses = map(self.create_extracted_license, self.document.extracted_licenses) for lic in licenses: self.graph.add( (doc_node, self.spdx_namespace.hasExtractedLicensingInfo, lic)) # Add files files = self.files() for file_node in files: self.graph.add( (doc_node, self.spdx_namespace.referencesFile, file_node)) self.add_file_dependencies() # Add package package_node = self.packages() package_triple = (doc_node, self.spdx_namespace.describesPackage, package_node) self.graph.add(package_triple) # normalize the graph to ensure that the sort order is stable self.graph = to_isomorphic(self.graph) # Write file self.graph.serialize(self.out, 'pretty-xml', encoding='utf-8')
def digest_graphs(self, _, line): if ".nq" not in line: return #sys.stderr.write("Downloading %s\n"%line) #local_file = download(line, "btc-2014", "archives/") #sys.stderr.write("Downloaded %s\n"%line) #f = gzip.GzipFile(fileobj=open(local_file, 'rb')) #allGraphs = ConjunctiveGraph(store='Sleepycat') #allGraphs.open("local_store", create=True) #for line in f: # try: # allGraphs.parse(data=line, format="nquads") # except: # sys.stderr.write( "BAD LINE: %s"% line) # # pass #sys.stderr.write("Parsed %s"%line) #yield 'benchmark', ','.join(stat_cols) #results = StringIO.StringIO() #resultsWriter = csv.writer(results) results = {} graphs = collections.defaultdict(str) i = 0 #for g in allGraphs.contexts(): # i += 1 for uri, lines in self.segment_graphs(_, line): #if uri is None: # continue i += 1 graphs[uri] += '\n' + lines stats = collections.defaultdict(str) stats["id"] = uri try: g = ConjunctiveGraph() g.parse(data=graphs[uri], format="nquads") sys.stderr.flush() if len(g) == 0: sys.stderr.write("%s (%d)" % (graphs[uri], len(g))) stats['lines'] = len(graphs[uri].split('\n')) ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) #stats['graph_digest'] = graph_digest except Exception as e: sys.stderr.write("ERROR: %s %s\n" % (stats['id'], e)) stats['error'] = str(e) sys.stderr.write("Processed %s with %s triples in %s sec.\n" % (uri, len(g), stats['to_hash_runtime'])) stats_line = [str(stats[c]) for c in stat_cols] results[uri] = stats_line #resultsWriter.writerow(stats_line) try: shutil.rmtree(store_dir) except: pass sys.stderr.write("Digested %s into %s graphs.\n" % (line, len(graphs))) results_string = StringIO.StringIO() results_writer = csv.writer(results_string) for result in results.values(): results_writer.writerow(result) yield "benchmark", results_string.getvalue()
def main(): from rdflib import Graph from rdflib.compare import to_isomorphic, graph_diff with open(sys.argv[1]) as f: d1 = f.read() with open(sys.argv[2]) as f: d2 = f.read() print('Loading graph 1 from ' + sys.argv[1]) g1 = Graph().parse(format='n3', data=d1) print('Loading graph 2 from ' + sys.argv[2]) g2 = Graph().parse(format='n3', data=d2) iso1 = to_isomorphic(g1) iso2 = to_isomorphic(g2)
def get_digest_value(rdf, mimetype): graph = Graph() graph.load(StringIO(rdf),format=mimetype) stats = {} ig = to_isomorphic(graph) result = ig.graph_digest(stats) print(stats) return result
def digest_graphs(self, _, line): if ".nq" not in line: return #sys.stderr.write("Downloading %s\n"%line) #local_file = download(line, "btc-2014", "archives/") #sys.stderr.write("Downloaded %s\n"%line) #f = gzip.GzipFile(fileobj=open(local_file, 'rb')) #allGraphs = ConjunctiveGraph(store='Sleepycat') #allGraphs.open("local_store", create=True) #for line in f: # try: # allGraphs.parse(data=line, format="nquads") # except: # sys.stderr.write( "BAD LINE: %s"% line) # # pass #sys.stderr.write("Parsed %s"%line) #yield 'benchmark', ','.join(stat_cols) #results = StringIO.StringIO() #resultsWriter = csv.writer(results) results = {} graphs = collections.defaultdict(str) i = 0 #for g in allGraphs.contexts(): # i += 1 for uri, lines in self.segment_graphs(_, line): #if uri is None: # continue i += 1 graphs[uri] += '\n'+lines stats = collections.defaultdict(str) stats["id"] = uri try: g = ConjunctiveGraph() g.parse(data=graphs[uri], format="nquads") sys.stderr.flush() if len(g) == 0: sys.stderr.write("%s (%d)" % (graphs[uri], len(g))) stats['lines'] = len(graphs[uri].split('\n')) ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) #stats['graph_digest'] = graph_digest except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(stats['id'], e)) stats['error'] = str(e) sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime'])) stats_line = [str(stats[c]) for c in stat_cols] results[uri] = stats_line #resultsWriter.writerow(stats_line) try: shutil.rmtree(store_dir) except: pass sys.stderr.write("Digested %s into %s graphs.\n"%(line, len(graphs))) results_string = StringIO.StringIO() results_writer = csv.writer(results_string) for result in results.values(): results_writer.writerow(result) yield "benchmark", results_string.getvalue()
def graphdiff(first, second): """ Diff between graph instances, should be replaced/included in quit diff """ from rdflib.compare import to_isomorphic, graph_diff diffs = OrderedDict() iris = set() if first is not None and isinstance(first, InMemoryAggregatedGraph): first_identifiers = list((g.identifier for g in first.graphs())) iris = iris.union(first_identifiers) if second is not None and isinstance(second, InMemoryAggregatedGraph): second_identifiers = list((g.identifier for g in second.graphs())) iris = iris.union(second_identifiers) for iri in sorted(list(iris)): changes = diffs.get(iri, []) if (first is not None and iri in first_identifiers) and ( second is not None and iri in second_identifiers): g1 = first.get_context(iri) g2 = second.get_context(iri) in_both, in_first, in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) if len(in_second) > 0: changes.append( ('additions', ((s, p, o) for s, p, o in in_second))) if len(in_first) > 0: changes.append( ('removals', ((s, p, o) for s, p, o in in_first))) elif first is not None and iri in first_identifiers: changes.append( ('removals', ((s, p, o) for s, p, o in first.get_context(iri)))) elif second is not None and iri in second_identifiers: changes.append( ('additions', ((s, p, o) for s, p, o in second.get_context(iri)))) else: continue diffs[iri] = changes return diffs
def test_load_single(self, mock_sparql_delete, mock_sparql_insert): with Store(self.data_path) as store: store.add("0000-0003-1527-0030") (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ store["0000-0003-1527-0030"] self.assertIsNone(last_update) graph1, add_graph1, delete_graph1 = load_single("0000-0003-1527-0030", None, None, None, self.data_path, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") self.assertEqual(319, len(add_graph1)) self.assertEqual(0, len(delete_graph1)) self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1)) with Store(self.data_path) as store: # Last update now set (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ store["0000-0003-1527-0030"] self.assertIsNotNone(last_update) # Make sure turtle file created self.assertTrue(os.path.exists(os.path.join(self.data_path, "0000-0003-1527-0030.ttl"))) # Now change a fact and run again. Changed fact is provided by vcr recording. # Changed year of Amherst degree. # Had to rig the Accept-Encoding to create the vcr recording with: # r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid, # headers={"Accept": "application/json", "Accept-Encoding": "identity"}) graph2, add_graph2, delete_graph2 = load_single("0000-0003-1527-0030", None, None, None, self.data_path, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") self.assertEqual(319, len(graph2)) self.assertEqual(17, len(add_graph2)) self.assertEqual(17, len(delete_graph2)) mock_sparql_insert.assert_has_calls([ call(add_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"), call(add_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")]) mock_sparql_delete.assert_has_calls([ call(delete_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"), call(delete_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")])
def rem_metadata(g: Graph) -> IsomorphicGraph: g_iso = to_isomorphic(g) for t in list(g_iso.triples((None, METAMODEL_NAMESPACE.generation_date, None))): g_iso.remove(t) for t in list(g_iso.triples((None, METAMODEL_NAMESPACE.source_file_date, None))): g_iso.remove(t) for t in list(g_iso.triples((None, METAMODEL_NAMESPACE.source_file_size, None))): g_iso.remove(t) return g_iso
def rdf_comparator(self, old_data: str, new_data: str, new_file: str) -> None: """ RDF comparator. Compare two graphs and, if they don't match, save a turtle image of new_data in new_file and raise an error :param old_data: Turtle representation of expected RDF :param new_data: Turtle representation of actual RDF :param new_file: Save actual RDF here if mismatch :return: """ old_graph = Graph() new_graph = Graph() old_graph.parse(data=old_data, format="turtle") new_graph.parse(data=new_data, format="turtle") old_iso = to_isomorphic(old_graph) # Remove the metadata specific triples for t in list( old_iso.triples( (None, METAMODEL_NAMESPACE.generation_date, None))): old_iso.remove(t) new_iso = to_isomorphic(new_graph) for t in list( new_iso.triples( (None, METAMODEL_NAMESPACE.generation_date, None))): new_iso.remove(t) # Graph compare takes a Looong time in_both, in_old, in_new = graph_diff(old_iso, new_iso) # if old_iso != new_iso: # in_both, in_old, in_new = graph_diff(old_iso, new_iso) old_len = len(list(in_old)) new_len = len(list(in_new)) if old_len or new_len: if old_len: print("----- Old graph only -----") self._print_triples(in_old) if new_len: print("----- New Grapn Only -----") self._print_triples(in_new) with open(new_file, 'w') as newf: newf.write(new_data) self.assertTrue(False, "RDF file mismatch")
def rem_metadata(g: Graph) -> IsomorphicGraph: # Remove list declarations from target for s in g.subjects(RDF.type, RDF.List): g.remove((s, RDF.type, RDF.List)) for t in g: if t[1] in (LINKML.generation_date, LINKML.source_file_date, LINKML.source_file_size, TYPE.generation_date, TYPE.source_file_date, TYPE.source_file_size): g.remove(t) g_iso = to_isomorphic(g) return g_iso
def test_load_single(self, mock_sparql_delete, mock_sparql_insert): with Store(self.data_path) as store: store.add("0000-0003-1527-0030") (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ store["0000-0003-1527-0030"] self.assertIsNone(last_update) graph1, add_graph1, delete_graph1 = load_single("0000-0003-1527-0030", None, None, None, self.data_path, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") self.assertEqual(232, len(add_graph1)) self.assertEqual(0, len(delete_graph1)) self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1)) with Store(self.data_path) as store: #Last update now set (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ store["0000-0003-1527-0030"] self.assertIsNotNone(last_update) #Make sure turtle file created self.assertTrue(os.path.exists(os.path.join(self.data_path, "0000-0003-1527-0030.ttl"))) #Now change a fact and run again. Changed fact is provided by vcr recording. #Changed year of Amherst degree. graph2, add_graph2, delete_graph2 = load_single("0000-0003-1527-0030", None, None, None, self.data_path, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password") self.assertEqual(232, len(graph2)) self.assertEqual(17, len(add_graph2)) self.assertEqual(17, len(delete_graph2)) mock_sparql_insert.assert_has_calls([ call(add_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"), call(add_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")]) mock_sparql_delete.assert_has_calls([ call(delete_graph1, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password"), call(delete_graph2, "http://vivo.mydomain.edu/sparql", "*****@*****.**", "password")])
def graphdiff(first, second): """ Diff between graph instances, should be replaced/included in quit diff """ from rdflib.compare import to_isomorphic, graph_diff diffs = OrderedDict() iris = set() if first is not None and isinstance(first, InMemoryAggregatedGraph): first_identifiers = list((g.identifier for g in first.graphs())) iris = iris.union(first_identifiers) if second is not None and isinstance(second, InMemoryAggregatedGraph): second_identifiers = list((g.identifier for g in second.graphs())) iris = iris.union(second_identifiers) for iri in sorted(list(iris)): changes = diffs.get(iri, []) if ( first is not None and iri in first_identifiers ) and ( second is not None and iri in second_identifiers ): g1 = first.get_context(iri) g2 = second.get_context(iri) in_both, in_first, in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) if len(in_second) > 0: changes.append(('additions', ((s, p, o) for s, p, o in in_second))) if len(in_first) > 0: changes.append(('removals', ((s, p, o) for s, p, o in in_first))) elif first is not None and iri in first_identifiers: changes.append(('removals', ((s, p, o) for s, p, o in first.get_context(iri)))) elif second is not None and iri in second_identifiers: changes.append(('additions', ((s, p, o) for s, p, o in second.get_context(iri)))) else: continue diffs[iri] = changes return diffs
def test_graphs_equal(self): self.origin = reset_store_and_setup_origin() # TODO: make this file callable and with parameter: check URIs self.origin.GET(only_follow_uris=[]) g1 = self.origin._graph g2 = self.origin.get_graph() # normal rdflib.compare does not work correctly with # conjunctiveGraph, unless there is only one graph within that self.assertEqual(len(list(g1.contexts())), 1) self.assertEqual(len(list(g2.contexts())), 1) self.assertEqual(len(g1), len(g2)) self.assertEqual(compare.to_isomorphic(g1), compare.to_isomorphic(g2)) nsbindungs_orig = dict(g1.namespace_manager.namespaces()) nsbindungs_new = dict(g2.namespace_manager.namespaces()) self.assertEqual(nsbindungs_orig, nsbindungs_new)
def _are_added_triples(self, cur_subj): subj = cur_subj cur_subj_g = cur_subj.g prev_subj_g = Graph() query = "CONSTRUCT {<%s> ?p ?o} WHERE {<%s> ?p ?o}" % (subj, subj) result = self.ts.query(query) if result: for s, p, o in result: prev_subj_g.add((s, p, o)) iso1 = to_isomorphic(prev_subj_g) iso2 = to_isomorphic(cur_subj_g) if iso1 == iso2: # the graphs are the same return None else: in_both, in_first, in_second = graph_diff(iso1, iso2) query_string = u"INSERT DATA { GRAPH <%s> { " % cur_subj_g.identifier query_string += in_second.serialize( format="nt11", encoding="utf-8").decode("utf-8") return query_string.replace('\n\n', '') + "} }"
def canonicalize(resource): digest = to_isomorphic(resource.graph).graph_digest() #canonical = list(iso[:RDFS.label:resource.label()])[0] #print str(canonical), '\n', iso.serialize(format='turtle') skolemized = _prefix[str(digest)] for s, p, o in resource.graph.triples((None, None, resource.identifier)): resource.graph.remove((s, p, o)) resource.graph.add((s, p, skolemized)) for s, p, o in resource.graph.triples((resource.identifier, None, None)): resource.graph.remove((s, p, o)) resource.graph.add((skolemized, p, o)) return resource.graph.resource(skolemized)
def test_issue655(self): # make sure that inf and nan are serialized correctly dt = XSD['double'].n3() self.assertEqual( Literal(float("inf"))._literal_n3(True), '"INF"^^%s' % dt) self.assertEqual( Literal(float("-inf"))._literal_n3(True), '"-INF"^^%s' % dt) self.assertEqual( Literal(float("nan"))._literal_n3(True), '"NaN"^^%s' % dt) dt = XSD['decimal'].n3() self.assertEqual( Literal(Decimal("inf"))._literal_n3(True), '"INF"^^%s' % dt) self.assertEqual( Literal(Decimal("-inf"))._literal_n3(True), '"-INF"^^%s' % dt) self.assertEqual( Literal(Decimal("nan"))._literal_n3(True), '"NaN"^^%s' % dt) self.assertEqual( Literal("inf", datatype=XSD['decimal'])._literal_n3(True), '"INF"^^%s' % dt) # assert that non-numerical aren't changed self.assertEqual(Literal('inf')._literal_n3(True), '"inf"') self.assertEqual(Literal('nan')._literal_n3(True), '"nan"') PROV = Namespace('http://www.w3.org/ns/prov#') bob = URIRef("http://example.org/object/Bob") # g1 is a simple graph with an infinite and a nan values g1 = Graph() g1.add((bob, PROV.value, Literal(float("inf")))) g1.add((bob, PROV.value, Literal(float("nan")))) # Build g2 out of the deserialisation of g1 serialisation g2 = Graph() g2.parse(data=g1.serialize(format='turtle'), format='turtle') self.assertTrue(to_isomorphic(g1) == to_isomorphic(g2))
def write(self): doc_node = self.create_doc() # Add creation info creation_info_node = self.create_creation_info() ci_triple = (doc_node, self.spdx_namespace.creationInfo, creation_info_node) self.graph.add(ci_triple) # Add review info review_nodes = self.reviews() for review in review_nodes: self.graph.add((doc_node, self.spdx_namespace.reviewed, review)) # Add external document references info ext_doc_ref_nodes = self.ext_doc_refs() for ext_doc_ref in ext_doc_ref_nodes: ext_doc_ref_triple = ( doc_node, self.spdx_namespace.externalDocumentRef, ext_doc_ref, ) self.graph.add(ext_doc_ref_triple) # Add extracted licenses licenses = map(self.create_extracted_license, self.document.extracted_licenses) for lic in licenses: self.graph.add( (doc_node, self.spdx_namespace.hasExtractedLicensingInfo, lic)) # Add files files = self.files() for file_node in files: self.graph.add( (doc_node, self.spdx_namespace.referencesFile, file_node)) self.add_file_dependencies() # Add package for package_node in self.packages(): package_triple = (doc_node, self.spdx_namespace.describesPackage, package_node) self.graph.add(package_triple) """# Add relationship relate_node = self.relationships() relate_triple = (doc_node, self.spdx_namespace.relationship, relate_node) self.graph.add(relate_triple)""" # Add snippet snippet_nodes = self.snippets() for snippet in snippet_nodes: self.graph.add((doc_node, self.spdx_namespace.Snippet, snippet)) # normalize the graph to ensure that the sort order is stable self.graph = to_isomorphic(self.graph) # Write file self.graph.serialize(self.out, "pretty-xml", encoding="utf-8")
def readIsomorphicGraph(self, file): graph = ConjunctiveGraph(identifier='') # check if we handle a directory or a seperate file if isdir(file): # for a better readability rename variable dir = file for path, dirs, files in walk(file): for file in files: absfile = join(path, file) format = rdflib.util.guess_format(absfile) if format is not None: graph.parse(absfile, format=format, publicID=self.nsQuitDiff) elif isfile(file): format = rdflib.util.guess_format(file) if format is not None: graph.parse(file, format=format, publicID=self.nsQuitDiff) contextDict = {} contextDict[self.nsQuitDiff] = Graph() for subgraph in graph.contexts(): # TODO we have to copy all the triples to a new ConjunctiveGraph # because https://rdflib.readthedocs.io/en/stable/_modules/rdflib/compare.html takes the complete store # and thus doesn't support quads triples = subgraph.triples((None, None, None)) if isinstance(subgraph.identifier, BNode) or str( subgraph.identifier) == self.nsQuitDiff: subgraphConjunctive = contextDict[self.nsQuitDiff] else: try: subGraphConjunctive = contextDict[subgraph.identifier] except: contextDict[subgraph.identifier] = ConjunctiveGraph() subgraphConjunctive = contextDict[subgraph.identifier] for triple in triples: subgraphConjunctive.add(triple) # end TODO hack graphDict = {} for identifier, graph in contextDict.items(): graphDict[identifier] = compare.to_isomorphic(graph) return graphDict
def readIsomorphicGraph(self, file): graph = ConjunctiveGraph(identifier='') # check if we handle a directory or a seperate file if isdir(file): # for a better readability rename variable dir = file for path, dirs, files in walk(file): for file in files: absfile = join(path, file) format = rdflib.util.guess_format(absfile) if format is not None: graph.parse(absfile, format=format, publicID=self.nsQuitDiff) elif isfile(file): format = rdflib.util.guess_format(file) if format is not None: graph.parse(file, format=format, publicID=self.nsQuitDiff) contextDict = {} contextDict[self.nsQuitDiff] = Graph() for subgraph in graph.contexts(): # TODO we have to copy all the triples to a new ConjunctiveGraph # because https://rdflib.readthedocs.io/en/stable/_modules/rdflib/compare.html takes the complete store # and thus doesn't support quads triples = subgraph.triples((None, None, None)) if isinstance(subgraph.identifier, BNode) or str(subgraph.identifier) == self.nsQuitDiff: subgraphConjunctive = contextDict[self.nsQuitDiff] else: try: subGraphConjunctive = contextDict[subgraph.identifier] except: contextDict[subgraph.identifier] = ConjunctiveGraph() subgraphConjunctive = contextDict[subgraph.identifier] for triple in triples: subgraphConjunctive.add(triple) # end TODO hack graphDict = {} for identifier, graph in contextDict.items(): graphDict[identifier] = compare.to_isomorphic(graph) return graphDict
def worker(q, finished_tasks, dl_lock): try: while True: stats = q.get() og = Graph() try: og.load(stats["download_url"]) print(stats["ontology"], stats["id"]) ig = to_isomorphic(og) graph_digest = ig.graph_digest(stats) finished_tasks.put(stats) except Exception as e: print("ERROR", stats["id"], e) stats["error"] = str(e) finished_tasks.put(stats) except queue.Empty: pass
def worker(q, finished_tasks, dl_lock): try: while True: stats = q.get() og = Graph() try: og.load(stats['download_url']) print stats['ontology'], stats['id'] ig = to_isomorphic(og) graph_digest = ig.graph_digest(stats) finished_tasks.put(stats) except Exception as e: print 'ERROR', stats['id'], e stats['error'] = str(e) finished_tasks.put(stats) except Empty: pass
def worker(q, finished_tasks, dl_lock): try: while True: stats = q.get() og = Graph() try: og.load(stats['download_url']) print(stats['ontology'], stats['id']) ig = to_isomorphic(og) graph_digest = ig.graph_digest(stats) finished_tasks.put(stats) except Exception as e: print('ERROR', stats['id'], e) stats['error'] = str(e) finished_tasks.put(stats) except queue.Empty: pass
def digest_graph(uri, turtle): stats = collections.defaultdict(str) stats["id"] = uri #stats['lines'] = len(nquads.split('\n')) #sys.stderr.flush() try: g = ConjunctiveGraph() g.parse(data=turtle, format="turtle") #sys.stderr.write("Processing %s (%d)...\n"%(uri, len(g))) sys.stderr.write('.') sys.stderr.flush() stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) #sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime'])) except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(uri, e)) sys.stderr.flush() stats['error'] = str(e) #print nquads return [unicode(stats[c]).encode("ascii","ignore") for c in stat_cols]
def process_one(uri): stats = collections.defaultdict(str) stats["id"] = uri stats['debug'] = True try: g = ConjunctiveGraph() g.parse(data=requests.get(uri,headers={"Accept":"application/rdf+xml"}).text,format="xml") #print g.serialize(format="turtle") stats['lines'] = len(g) sys.stderr.write("Processing %s (%d)...\n"%(uri, stats['lines'])) sys.stderr.flush() stats['ontology'] = g.value(predicate=RDF.type, object=OWL.Class) is not None ig = to_isomorphic(g) graph_digest = ig.graph_digest(stats) sys.stderr.write("Processed %s with %s triples in %s sec.\n"%(uri, len(g), stats['to_hash_runtime'])) except Exception as e: sys.stderr.write("ERROR: %s %s\n"%(uri, e)) stats['error'] = str(e) sys.stderr.flush() print '\n'.join(['%s:\t%s'%(key, str(value)) for key, value in stats.items()]) return [str(stats[c]) for c in stat_cols]
def worker(q, finished_tasks, dl_lock): try: while True: stats = q.get() og = Graph() try: try: dl_lock.acquire() og.load(stats['download_url'] + "?apikey=%s" % apikey) finally: dl_lock.release() print(stats['ontology'], stats['id']) ig = to_isomorphic(og) graph_digest = ig.graph_digest(stats) finished_tasks.put(stats) except Exception as e: print('ERROR', stats['id'], e) stats['error'] = str(e) finished_tasks.put(stats) except Empty: pass
def GET( self, GRAPH_SIZE_LIMIT=30000, only_follow_uris=None, handle_owl_imports=False, raise_errors=True, skip_urls=None, httphandler=None, ): if not self.uri: raise Exception("Please provide URI first") if skip_urls is not None and self.uri.encode("utf8") in skip_urls: self.add_error("Skipped") self.processed = True return logger.info(u"GET %s..." % self.uri) if self.has_unsaved_changes(): if self.processed: raise Exception("Please save all changes before querying " "again. Merging not supported yet") else: logger.warning("There were Resource objects created before " "processing the resource's origin.") now = datetime.datetime.now() # self.timedelta = datetime.timedelta(minutes=1) if hasattr(self, "timedelta") and hasattr(self, 'last_processed'): time_since_last_processed = now - self.last_processed if (time_since_last_processed < self.timedelta): logger.info( "Not processing %s again because was processed only %s ago" % (self.uri, time_since_last_processed)) return self.last_processed = now try: data = self.backend.GET(self.uri, httphandler=httphandler) except urllib2.HTTPError as e: if e.code in [ 401, 403, 503, # Service Temporarily Unavailable 404, # Not Found ]: self.add_error(e.code) if raise_errors: raise e else: return except urllib2.URLError as e: self.add_error("timeout") if raise_errors: raise e else: return except ContentNegotiationError as e: logger.error(e.message) if raise_errors: raise e else: return graph = rdflib.graph.ConjunctiveGraph(identifier=self.uri) try: if data: # Important: Do not pass data=data without publicID=uri because # relative URIs (#deri) won't be an absolute uri in that case! publicID = self.uri reference_time = datetime.datetime.now() graph.parse(data=data, publicID=publicID, format=self.backend.format) now = datetime.datetime.now() self.graph_parse_time = now - reference_time # normal rdflib.compare does not work correctly with # ConjunctiveGraph, unless there is only one graph within that except SAXParseException as e: self.add_error("SAXParseException") logger.error("SAXParseException: %s" % self) if raise_errors: raise e else: return except rdflib.exceptions.ParserError as e: self.add_error("ParserError") logger.error("ParserError: %s" % self) if raise_errors: raise e else: return except IOError as e: self.add_error("IOError") logger.error("IOError: %s" % self) if raise_errors: raise e else: return self.processed = True if hasattr(self, "errors"): delattr(self, "errors") g_length = len(graph) if g_length > 0: if len(list(graph.contexts())) > 1: # detect problems with graph contexts: rdflib can only # compare graphs with one context. If a graph has more # contexts this might result in wrong comparisons of graphs # Still ignored here as ldtools is more robust by doing so. logger.error("The graph has more than one context. This" "might cause problems comparing the graphs!") if g_length > GRAPH_SIZE_LIMIT: logger.error("Maximum graph size exceeded. Thr graph is %s " "triples big. Limit is set to %s. The aquired " "graph exceeds that! Pass GRAPH_SIZE_LIMIT to set it " "differently." % (g_length, GRAPH_SIZE_LIMIT)) return if hasattr(self, "_graph"): # we already assured that there are no unsaved_changes # --> get_graph() == _graph logger.info(u"Already crawled: %s. Comparing graphs..." % self.uri) if compare.to_isomorphic(self._graph) ==\ compare.to_isomorphic(graph): return else: logging.warning("GET retrieved updates for %s!" % self.uri) my_graph_diff(self._graph, graph) for resource in self.get_resources(): resource.delete() delattr(self, "handled") if hasattr(self, "handled"): return self._graph = graph graph_handler = GraphHandler( only_follow_uris=only_follow_uris, handle_owl_imports=handle_owl_imports, origin=self) graph_handler.populate_resources(graph=graph) self.handled = True
def my_graph_diff(graph1, graph2): """Compares graph2 to graph1 and highlights everything that changed. Colored if pygments available""" # quick fix for wrong type if not type(graph1) == type(graph2) == rdflib.Graph: if type(graph1) == rdflib.ConjunctiveGraph: g1contexts = list(graph1.contexts()) assert len(g1contexts) == 1 graph1 = g1contexts[0] if type(graph2) == rdflib.ConjunctiveGraph: g2contexts = list(graph2.contexts()) assert len(g2contexts) == 1 graph2 = g2contexts[0] # Return if both graphs are isomorphic iso1 = compare.to_isomorphic(graph1) iso2 = compare.to_isomorphic(graph2) if graph1.identifier == graph2.identifier: str_bit = u"The 2 '%s' Graphs" % graph1.identifier else: str_bit = (u"Graphs '%s' and '%s'" % (graph1.identifier, graph2.identifier)) if iso1 == iso2: logger.debug(u"%s are isomorphic" % str_bit) return print(u"Differences between %s." % str_bit) in_both, in_first, in_second = compare.graph_diff(iso1, iso2) def dump_nt_sorted(g): return sorted(g.serialize(format='nt').splitlines()) sorted_first = dump_nt_sorted(in_first) sorted_second = dump_nt_sorted(in_second) import difflib diff = difflib.unified_diff( sorted_first, sorted_second, u'Original', u'Current', lineterm='' ) try: from pygments import highlight from pygments.formatters import terminal from pygments.lexers import web lexer = web.XmlLexer() formatter = terminal.TerminalFormatter() print(highlight(u'\n'.join(diff), lexer, formatter)) except ImportError: logger.info("Install pygments for colored diffs") print(u'\n'.join(diff)) except UnicodeDecodeError: print(u"Only in first", unicode(sorted_first)) print(u"Only in second", unicode(sorted_second))
def isograph(doc): g = ConjunctiveGraph().parse(data=etree.tostring(doc)) return to_isomorphic(g)
from rdflib import Graph from rdflib.compare import to_isomorphic, graph_diff import sys if len(sys.argv)>=3: F1 = sys.argv[1] F2= sys.argv[2] else: F1 = "/home/barry/Downloads/instance.ttl" F2 = "/home/barry/Downloads/t2.ttl" g1 = Graph() g1.parse(F1, format="turtle") g2 = Graph() g2.parse(F2, format="turtle") iso1 = to_isomorphic(g1) iso2 = to_isomorphic(g2) in_both, in_first, in_second = graph_diff(iso1, iso2) if len(sys.argv)==4: print(in_first.serialize(format="n3").decode('utf-8')) else: print(in_second.serialize(format="n3").decode('utf-8'))