def Query(self, request: SageQuery, context: grpc.ServicerContext) -> SageResponse: graph: Graph = None try: query = request.query graph_name = request.default_graph_uri next_link = request.next_link if len(request.next_link) > 0 else None if not self._dataset.has_graph(graph_name): context.abort(code=404, details=f"RDF Graph {graph_name} not found on the server.") graph = self._dataset.get_graph(graph_name) # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: if self._dataset.is_stateless: saved_plan = next_link else: saved_plan = self._dataset.statefull_manager.get_plan(next_link) plan = load(decode_saved_plan(saved_plan), self._dataset) else: plan, cardinalities = parse_query(query, self._dataset, graph_name) loading_time = (time() - start) * 1000 # execute query engine = SageEngine() quota = graph.quota / 1000 max_results = graph.max_results bindings, saved_plan, is_done, abort_reason = run(engine.execute(plan, quota, max_results)) # commit or abort (if necessary) if abort_reason is not None: graph.abort() context.abort(code=500, details=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'") else: graph.commit() # encode saved plan if query execution is not done yet and there was no abort start = time() next_page = None if (not is_done) and abort_reason is None: next_page = encode_saved_plan(saved_plan) if not self._dataset.is_stateless: # generate the plan ID if this is the first time we execute this plan plan_id = next_link if next_link is not None else str(uuid4()) self._dataset.statefull_manager.save_plan(plan_id, next_page) next_page = plan_id elif is_done and (not self._dataset.is_stateless) and next_link is not None: # delete the saved plan, as it will not be reloaded anymore self._dataset.statefull_manager.delete_plan(next_link) exportTime = (time() - start) * 1000 # create response response = SageResponse(is_done = is_done, next_link = next_page) for binding in create_bindings(bindings): response.bindings.append(binding) return response except Exception as err: if graph is not None: graph.abort() context.abort(code=500, details=f"A server-side error has occurred: {str(err)}")
def execute_query(query, default_graph_uri, next_link, dataset, mimetype, url): """ Execute a query using the SageEngine and returns the appropriate HTTP response. Any failure will results in a rollback/abort on the current query execution. """ graph = None try: graph_name = format_graph_uri(default_graph_uri, url) if not dataset.has_graph(graph_name): logging.error("No RDF graph matching the default URI provided was found.") return sage_http_error("No RDF graph matching the default URI provided was found.") graph = dataset.get_graph(graph_name) # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: plan = load(decode_saved_plan(next_link), dataset) else: plan, cardinalities = parse_query(query, dataset, graph_name, url) loading_time = (time() - start) * 1000 # execute query engine = SageEngine() quota = graph.quota / 1000 max_results = graph.max_results bindings, saved_plan, is_done = engine.execute(plan, quota, max_results) # commit (if necessary) graph.commit() # compute controls for the next page start = time() next_page = None if not is_done: next_page = encode_saved_plan(saved_plan) exportTime = (time() - start) * 1000 stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime} # send response if mimetype == "application/sparql-results+json": return Response(responses.w3c_json_streaming(bindings, next_page, stats, url), content_type='application/json') if mimetype == "application/xml" or mimetype == "application/sparql-results+xml": return Response(responses.w3c_xml(bindings, next_page, stats), content_type="application/xml") if mimetype == "application/json": return Response(responses.raw_json_streaming(bindings, next_page, stats, url), content_type='application/json') # otherwise, return the HTML version return render_template("sage_page.html", query=query, default_graph_uri=default_graph_uri, bindings=bindings, next_page=next_page, stats=stats) except Exception as err: # abort all ongoing transactions (if required) # then forward the exception to the main loop logging.error(f"sage execute_query error: {err}") if graph is not None: graph.abort() raise err
def sage_query_debug(config_file, default_graph_uri, query, file, limit): """ debug a SPARQL query on an embedded Sage Server. Example usage: sage-query config.yaml http://example.org/swdf-postgres -f queries/spo.sparql """ # assert that we have a query to evaluate if query is None and file is None: print( "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations." ) exit(1) ## setting the log level of the asyncio logger to logging.DEBUG, for example the following snippet of code can be run at startup of the application: #logging.basicConfig(level=logging.WARNING) logging.basicConfig(level=logging.DEBUG) if limit is None: limit = inf # load query from file if required if file is not None: with open(file) as query_file: query = query_file.read() dataset = load_config(config_file) if dataset is None: print("config file {config_file} not found") exit(1) graph = dataset.get_graph(default_graph_uri) if graph is None: print("RDF Graph not found:" + default_graph_uri) exit(1) engine = SageEngine() cards = list() context = dict() context['quantum'] = 1000000 context['max_results'] = 1000000 from time import time context['start_timestamp'] = time() iterator, cards = parse_query(query, dataset, default_graph_uri, context) loop = asyncio.get_event_loop() loop.run_until_complete(execute(engine, iterator, limit)) loop.close()
async def execute_query(query: str, default_graph_uri: str, next_link: Optional[str], dataset: Dataset) -> Tuple[List[Dict[str, str]], Optional[str], Dict[str, str]]: """Execute a query using the SageEngine and returns the appropriate HTTP response. Any failure will results in a rollback/abort on the current query execution. Args: * query: SPARQL query to execute. * default_graph_uri: URI of the default RDF graph to use. * next_link: URI to a saved plan. Can be `None` if query execution should starts from the beginning. * dataset: RDF dataset on which the query is executed. Returns: A tuple (`bindings`, `next_page`, `stats`) where: * `bindings` is a list of query results. * `next_page` is a link to saved query execution state. Sets to `None` if query execution completed during the time quantum. * `stats` are statistics about query execution. Throws: Any exception that have occured during query execution. """ graph = None try: if not dataset.has_graph(default_graph_uri): raise HTTPException(status_code=404, detail=f"RDF Graph {default_graph_uri} not found on the server.") graph = dataset.get_graph(default_graph_uri) context = dict() context['quantum'] = graph.quota context['max_results'] = graph.max_results # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: if dataset.is_stateless: saved_plan = next_link else: saved_plan = dataset.statefull_manager.get_plan(next_link) plan = load(decode_saved_plan(saved_plan), dataset, context) else: plan, cardinalities = parse_query(query, dataset, default_graph_uri, context) logging.info(f'loading time: {(time() - start) * 1000}ms') loading_time = (time() - start) * 1000 # execute query engine = SageEngine() bindings, saved_plan, is_done, abort_reason = await engine.execute(plan, context) # commit or abort (if necessary) if abort_reason is not None: graph.abort() raise HTTPException(status_code=500, detail=f"The SPARQL query has been aborted for the following reason: '{abort_reason}'") else: graph.commit() start = time() # encode saved plan if query execution is not done yet and there was no abort next_page = None if (not is_done) and abort_reason is None: next_page = encode_saved_plan(saved_plan) if not dataset.is_stateless: # generate the plan ID if this is the first time we execute this plan plan_id = next_link if next_link is not None else str(uuid4()) dataset.statefull_manager.save_plan(plan_id, next_page) next_page = plan_id elif is_done and (not dataset.is_stateless) and next_link is not None: # delete the saved plan, as it will not be reloaded anymore dataset.statefull_manager.delete_plan(next_link) logging.info(f'export time: {(time() - start) * 1000}ms') exportTime = (time() - start) * 1000 stats = {"cardinalities": cardinalities, "import": loading_time, "export": exportTime} return (bindings, next_page, stats) except Exception as err: # abort all ongoing transactions, then forward the exception to the main loop logging.error(traceback.format_exc()) if graph is not None: graph.abort() raise err
async def test_parse_rowid(query, cardinality): iterator, cards = parse_query(query, dataset, 'context') print("pipeline:") print(iterator) assert len(cards) >= 0 assert iterator is not None
async def test_query_parser(self, query, cardinality): iterator, cards = parse_query(query, dataset, 'watdiv100') assert len(cards) > 0 assert iterator is not None
async def test_query_parser(self, query, cardinality): context= { 'quantum': 10e7, 'max_results': 10e7, 'start_timestamp': 0 } iterator, cards = parse_query(query, dataset, 'watdiv100', context) assert len(cards) > 0 assert iterator is not None
def explain(query, file, config_file, graph_uri, indentnb, update, parse): coloredlogs.install(level='INFO', fmt='%(asctime)s - %(levelname)s %(message)s') logger = logging.getLogger(__name__) if query is None and file is None: print( "Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations." ) exit(1) # load query from file if required if file is not None: with open(file) as query_file: query = query_file.read() dataset = load_config(config_file) if dataset is None: print("config file {config_file} not found") exit(1) graph = dataset.get_graph(graph_uri) if graph is None: print("RDF Graph not found:" + graph_uri) exit(1) engine = SageEngine() pp = pprint.PrettyPrinter(indent=indentnb) if query is None: exit(1) print("------------") print("Query") print("------------") print(query) if update: pq = parseUpdate(query) else: pq = parseQuery(query) if pq is None: exit(1) if parse: print("------------") print("Parsed Query") print("------------") pp.pprint(pq) print(prettify_parsetree(pq)) if update: tq = translateUpdate(pq) else: tq = translateQuery(pq) print("------------") print("Algebra") print("------------") print(pprintAlgebra(tq)) #logical_plan = tq.algebra cards = list() iterator, cards = parse_query(query, dataset, graph_uri) print("-----------------") print("Iterator pipeline") print("-----------------") print(iterator) print("-----------------") print("Cardinalities") print("-----------------") pp.pprint(cards)