def execute_query(query, default_graph_uri, next_link, dataset, mimetype, url): """Execute a query using the SageEngine and returns the appropriate HTTP response""" graph_name = format_graph_uri(default_graph_uri, url) if not dataset.has_graph(graph_name): return sage_http_error( "No RDF graph matching the default URI provided was found.") graph = dataset.get_graph(graph_name) # decode next_link or build query execution plan cardinalities = dict() start = time() if next_link is not None: plan = load(decode_saved_plan(next_link), dataset) else: plan, cardinalities = parse_query(query, dataset, graph_name, url) loading_time = (time() - start) * 1000 # execute query engine = SageEngine() quota = graph.quota / 1000 max_results = graph.max_results bindings, saved_plan, is_done = engine.execute(plan, quota, max_results) # compute controls for the next page start = time() next_page = None if not is_done: next_page = encode_saved_plan(saved_plan) exportTime = (time() - start) * 1000 stats = { "cardinalities": cardinalities, "import": loading_time, "export": exportTime } # send response if mimetype == "application/sparql-results+json": return Response(responses.w3c_json_streaming(bindings, next_page, stats, url), content_type='application/json') if mimetype == "application/xml" or mimetype == "application/sparql-results+xml": return Response(responses.w3c_xml(bindings, next_page, stats), content_type="application/xml") if mimetype == "application/json": return Response(responses.raw_json_streaming(bindings, next_page, stats, url), content_type='application/json') # otherwise, return the HTML version return render_template("sage_page.html", query=query, default_graph_uri=default_graph_uri, bindings=bindings, next_page=next_page, stats=stats)
# scan_test.py # Author: Thomas MINIER - MIT License 2017-2018 from query_engine.sage_engine import SageEngine from query_engine.iterators.scan import ScanIterator from database.hdt_file_connector import HDTFileConnector hdtDoc = HDTFileConnector('tests/data/test.hdt') engine = SageEngine() triple = { 'subject': '?s', 'predicate': '?p', 'object': '?o', 'graph': 'watdiv100' } def test_scan_read(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) (results, saved, done) = engine.execute(scan, 10e7) assert len(results) == card assert done def test_scan_save_nointerrupt(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card)
def lookup_entity(graph_name, entity): """Evaluates a DESCRIBE query over a RDF dataset""" logger.debug('[IP: {}] [/lookup/] Querying {}'.format( request.environ['REMOTE_ADDR'], graph_name)) graph = dataset.get_graph(graph_name) if graph is None: abort(404) url = secure_url(request.url) try: engine = SageEngine() # Get entity and possible next link entity_uri = secure_url(request.base_url) next_link = request.args.get("next", default=None) post_query = build_describe_query(entity_uri) logger.debug('[IP: {}] [/lookup/] Entity={}'.format( request.environ['REMOTE_ADDR'], entity_uri)) quota = graph.quota / 1000 max_results = graph.max_results # Load next link if next_link is not None: logger.debug( '[/lookup/{}] Saved plan found, decoding "next" link'. format(graph_name)) next_link = decode_saved_plan(next_link) else: logger.debug('[/lookup/{}] Query to evaluate: {}'.format( graph_name, post_query)) # build physical query plan, then execute it with the given quota logger.debug( '[/lookup/{}] Starting query evaluation...'.format(graph_name)) # start = time() plan, cardinalities = build_query_plan(post_query, dataset, graph_name, next_link) # loading_time = (time() - start) * 1000 bindings, saved_plan, is_done = engine.execute( plan, quota, max_results) logger.debug( '[/lookup/{}] Query evaluation completed'.format(graph_name)) # compute controls for the next page # start = time() next_page = None if is_done: logger.debug( '[/lookup/{}] Query completed under the time quota'.format( graph_name)) else: logger.debug( '[/lookup/{}] The query was not completed under the time quota...' .format(graph_name)) logger.debug( '[/lookup/{}] Saving the execution to plan to generate a "next" link' .format(graph_name)) next_page = encode_saved_plan(saved_plan) logger.debug( '[/lookup/{}] "next" link successfully generated'.format( graph_name)) # exportTime = (time() - start) * 1000 # stats = {"import": loading_time, "export": exportTime} triples = bindings_to_triple(entity_uri, bindings, url) headers = dict() if next_page is not None: headers["X-Sage-Next"] = "{}?next={}".format( entity_uri, next_page) headers[ "Link"] = "<{}?next={}>; rel=\"next\"; title=\"Next page\"".format( entity_uri, next_page) return Response(responses.ntriples_streaming(triples), content_type="application/ntriples", headers=headers) except Exception as e: logger.error(e) abort(500)
def sparql_query(graph_name): """WARNING: old API, deprecated""" graph = dataset.get_graph(graph_name) if graph is None: abort(404) logger.debug('[/sparql/] Corresponding dataset found') mimetype = request.accept_mimetypes.best_match([ "application/json", "application/xml", "application/sparql-results+json", "application/sparql-results+xml" ]) url = secure_url(request.url) try: # A GET request always returns the homepage of the dataset if request.method == "GET" or (not request.is_json): dinfo = graph.describe(url) dinfo['@id'] = url void_desc = { "nt": VoidDescriptor(url, graph).describe("ntriples"), "ttl": VoidDescriptor(url, graph).describe("turtle"), "xml": VoidDescriptor(url, graph).describe("xml") } return render_template("website/sage_dataset.html", dataset_info=dinfo, void_desc=void_desc) engine = SageEngine() post_query, err = QueryRequest().load(request.get_json()) if err is not None and len(err) > 0: return Response(format_marshmallow_errors(err), status=400) quota = graph.quota / 1000 max_results = graph.max_results # Load next link next_link = None if 'next' in post_query: next_link = decode_saved_plan(post_query["next"]) # build physical query plan, then execute it with the given quota start = time() plan, cardinalities = build_query_plan(post_query["query"], dataset, graph_name, next_link) loading_time = (time() - start) * 1000 # convert in milliseconds bindings, saved_plan, is_done = engine.execute( plan, quota, max_results) # compute controls for the next page start = time() next_page = None if not is_done: next_page = encode_saved_plan(saved_plan) exportTime = (time() - start) * 1000 # convert in milliseconds stats = { "cardinalities": cardinalities, "import": loading_time, "export": exportTime } if mimetype == "application/sparql-results+json": res = Response(responses.w3c_json_streaming( bindings, next_page, stats, url), content_type='application/json') if mimetype == "application/xml" or mimetype == "application/sparql-results+xml": res = Response(responses.w3c_xml(bindings, next_page, stats), content_type="application/xml") else: res = Response(responses.raw_json_streaming( bindings, next_page, stats, url), content_type='application/json') # set deprecation warning in headers res.headers.add( "Warning", "199 SaGe/2.0 \"You are using a deprecated API. Consider uppgrading to the SaGe SPARQL query API. See http://sage.univ-nantes.fr/documentation fore more details.\"" ) return res except Exception as e: logger.error(e) abort(500)
def sparql_query(dataset_name): logger.info('[IP: {}] [/sparql/] Querying {}'.format( request.environ['REMOTE_ADDR'], dataset_name)) dataset = datasets.get_dataset(dataset_name) if dataset is None: abort(404) logger.debug('[/sparql/] Corresponding dataset found') mimetype = request.accept_mimetypes.best_match([ "application/json", "application/xml", "application/sparql-results+json", "application/sparql-results+xml" ]) url = secure_url(request.url) try: # A GET request always returns the homepage of the dataset if request.method == "GET" or (not request.is_json): dinfo = dataset.describe(url) dinfo['@id'] = url void_desc = { "nt": VoidDescriptor(url, dataset).describe("ntriples"), "ttl": VoidDescriptor(url, dataset).describe("turtle"), "xml": VoidDescriptor(url, dataset).describe("xml") } return render_template("sage.html", dataset_info=dinfo, void_desc=void_desc) engine = SageEngine() post_query, err = QueryRequest().load(request.get_json()) if err is not None and len(err) > 0: return Response(format_marshmallow_errors(err), status=400) logger.info('[IP: {}] [/sparql/] Query={}'.format( request.environ['REMOTE_ADDR'], post_query)) quota = dataset.quota / 1000 max_results = dataset.max_results # Load next link next_link = None if 'next' in post_query: logger.debug( '[/sparql/{}] Saved plan found, decoding "next" link'. format(dataset_name)) next_link = decode_saved_plan(post_query["next"]) else: logger.debug('[/sparql/{}] Query to evaluate: {}'.format( dataset_name, post_query)) # build physical query plan, then execute it with the given quota logger.debug('[/sparql/{}] Starting query evaluation...'.format( dataset_name)) start = time() plan, cardinalities = build_query_plan(post_query["query"], dataset, next_link) loading_time = (time() - start) * 1000 bindings, saved_plan, is_done = engine.execute( plan, quota, max_results) logger.debug( '[/sparql/{}] Query evaluation completed'.format(dataset_name)) # compute controls for the next page start = time() next_page = None if is_done: logger.debug( '[/sparql/{}] Query completed under the time quota'.format( dataset_name)) else: logger.debug( '[/sparql/{}] The query was not completed under the time quota...' .format(dataset_name)) logger.debug( '[/sparql/{}] Saving the execution to plan to generate a "next" link' .format(dataset_name)) next_page = encode_saved_plan(saved_plan) logger.debug( '[/sparql/{}] "next" link successfully generated'.format( dataset_name)) exportTime = (time() - start) * 1000 stats = { "cardinalities": cardinalities, "import": loading_time, "export": exportTime } if mimetype == "application/sparql-results+json": return json.jsonify(responses.json(bindings, next_page, stats)) if mimetype == "application/xml" or mimetype == "application/sparql-results+xml": return Response(responses.xml(bindings, next_page, stats), content_type="application/xml") return json.jsonify(responses.raw_json(bindings, next_page, stats)) except Exception: abort(500)