def assure_results(self, graph): self.assertEqual(count(), (2, 2)) self.assertEqual(len(graph), 2) self.assertIn((rdflib.term.URIRef('http://example.com/foaf#me'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal(u'Max Mustermann')), graph.triples((None, None, None)) ) self.assertIn((rdflib.term.URIRef('http://example.com/foaf#me'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/Person')), graph.triples((None, None, None)) ) self.assert_(not my_graph_diff(graph, self.origin.get_graph()))
def GET( self, GRAPH_SIZE_LIMIT=30000, only_follow_uris=None, handle_owl_imports=False, raise_errors=True, skip_urls=None, httphandler=None, ): if not self.uri: raise Exception("Please provide URI first") if skip_urls is not None and self.uri.encode("utf8") in skip_urls: self.add_error("Skipped") self.processed = True return logger.info(u"GET %s..." % self.uri) if self.has_unsaved_changes(): if self.processed: raise Exception("Please save all changes before querying " "again. Merging not supported yet") else: logger.warning("There were Resource objects created before " "processing the resource's origin.") now = datetime.datetime.now() # self.timedelta = datetime.timedelta(minutes=1) if hasattr(self, "timedelta") and hasattr(self, 'last_processed'): time_since_last_processed = now - self.last_processed if (time_since_last_processed < self.timedelta): logger.info( "Not processing %s again because was processed only %s ago" % (self.uri, time_since_last_processed)) return self.last_processed = now try: data = self.backend.GET(self.uri, httphandler=httphandler) except urllib2.HTTPError as e: if e.code in [ 401, 403, 503, # Service Temporarily Unavailable 404, # Not Found ]: self.add_error(e.code) if raise_errors: raise e else: return except urllib2.URLError as e: self.add_error("timeout") if raise_errors: raise e else: return except ContentNegotiationError as e: logger.error(e.message) if raise_errors: raise e else: return graph = rdflib.graph.ConjunctiveGraph(identifier=self.uri) try: if data: # Important: Do not pass data=data without publicID=uri because # relative URIs (#deri) won't be an absolute uri in that case! publicID = self.uri reference_time = datetime.datetime.now() graph.parse(data=data, publicID=publicID, format=self.backend.format) now = datetime.datetime.now() self.graph_parse_time = now - reference_time # normal rdflib.compare does not work correctly with # ConjunctiveGraph, unless there is only one graph within that except SAXParseException as e: self.add_error("SAXParseException") logger.error("SAXParseException: %s" % self) if raise_errors: raise e else: return except rdflib.exceptions.ParserError as e: self.add_error("ParserError") logger.error("ParserError: %s" % self) if raise_errors: raise e else: return except IOError as e: self.add_error("IOError") logger.error("IOError: %s" % self) if raise_errors: raise e else: return self.processed = True if hasattr(self, "errors"): delattr(self, "errors") g_length = len(graph) if g_length > 0: if len(list(graph.contexts())) > 1: # detect problems with graph contexts: rdflib can only # compare graphs with one context. If a graph has more # contexts this might result in wrong comparisons of graphs # Still ignored here as ldtools is more robust by doing so. logger.error("The graph has more than one context. This" "might cause problems comparing the graphs!") if g_length > GRAPH_SIZE_LIMIT: logger.error("Maximum graph size exceeded. Thr graph is %s " "triples big. Limit is set to %s. The aquired " "graph exceeds that! Pass GRAPH_SIZE_LIMIT to set it " "differently." % (g_length, GRAPH_SIZE_LIMIT)) return if hasattr(self, "_graph"): # we already assured that there are no unsaved_changes # --> get_graph() == _graph logger.info(u"Already crawled: %s. Comparing graphs..." % self.uri) if compare.to_isomorphic(self._graph) ==\ compare.to_isomorphic(graph): return else: logging.warning("GET retrieved updates for %s!" % self.uri) my_graph_diff(self._graph, graph) for resource in self.get_resources(): resource.delete() delattr(self, "handled") if hasattr(self, "handled"): return self._graph = graph graph_handler = GraphHandler( only_follow_uris=only_follow_uris, handle_owl_imports=handle_owl_imports, origin=self) graph_handler.populate_resources(graph=graph) self.handled = True
def assure_results(self, graph): self.assertEqual(count(), (3, 3)) self.assertEqual(len(graph), 2) self.assert_(not my_graph_diff(graph, self.origin.get_graph()))
def assure_results(self, graph): self.assertEqual(count(), (5, 8)) self.assertEqual(len(graph), 5) # assert graph == get_graph() self.assert_(not my_graph_diff(graph, self.origin.get_graph())) self.assert_(self.origin2.processed)