def load(saved_plan: SavedProtobufPlan, dataset: Dataset, context: dict) -> PreemptableIterator: """Load a preemptable physical query execution plan from a saved state. Args: * saved_plan: Saved query execution plan. * dataset: RDF dataset used to execute the plan. * context: Information about the query execution. Returns: The pipeline of iterator used to continue query execution. """ # unpack the plan from the serialized protobuf message if isinstance(saved_plan, bytes): root = RootTree() root.ParseFromString(saved_plan) sourceField = root.WhichOneof('source') saved_plan = getattr(root, sourceField) # load the plan based on the current node if type(saved_plan) is SavedFilterIterator: return load_filter(saved_plan, dataset, context) if type(saved_plan) is SavedProjectionIterator: return load_projection(saved_plan, dataset, context) elif type(saved_plan) is SavedScanIterator: return load_scan(saved_plan, dataset, context) elif type(saved_plan) is SavedIndexJoinIterator: return load_nlj(saved_plan, dataset, context) elif type(saved_plan) is SavedBagUnionIterator: return load_union(saved_plan, dataset, context) else: raise Exception(f"Unknown iterator type '{type(saved_plan)}' when loading controls")
def load(protoMsg, dataset): """Load a preemptable physical query execution plan from a saved state""" saved_plan = protoMsg if isinstance(protoMsg, bytes): root = RootTree() root.ParseFromString(protoMsg) sourceField = root.WhichOneof('source') saved_plan = getattr(root, sourceField) if type(saved_plan) is SavedFilterIterator: return load_filter(saved_plan, dataset) if type(saved_plan) is SavedProjectionIterator: return load_projection(saved_plan, dataset) elif type(saved_plan) is SavedScanIterator: return load_scan(saved_plan, dataset) elif type(saved_plan) is SavedIndexJoinIterator: return load_nlj(saved_plan, dataset) elif type(saved_plan) is SavedBagUnionIterator: return load_union(saved_plan, dataset) elif type(saved_plan) is SavedGroupByAgg: return load_groupby(saved_plan, dataset) elif type(saved_plan) is SavedAggregatesProjectionIterator: return load_aggregates_projection(saved_plan, dataset) else: raise Exception('Unknown iterator type "%s" when loading controls' % type(saved_plan))
def load(saved_plan: SavedProtobufPlan, dataset: Dataset) -> PreemptableIterator: """Load a preemptable physical query execution plan from a saved state. Args: * saved_plan: Saved query execution plan. * dataset: RDF dataset used to execute the plan. Returns: The pipeline of iterator used to continue query execution. """ # unpack the plan from the serialized protobuf message try: # print(f"...{type(saved_plan)}...") if isinstance(saved_plan, bytes): root = RootTree() root.ParseFromString(saved_plan) sourceField = root.WhichOneof('source') saved_plan = getattr(root, sourceField) # load the plan based on the current node if type(saved_plan) is SavedFilterIterator: return load_filter(saved_plan, dataset) elif type(saved_plan) is SavedProjectionIterator: return load_projection(saved_plan, dataset) elif type(saved_plan) is SavedReducedIterator: return load_reduced(saved_plan, dataset) elif type(saved_plan) is SavedScanIterator: return load_scan(saved_plan, dataset) elif type(saved_plan) is SavedIndexJoinIterator: return load_nlj(saved_plan, dataset) elif type(saved_plan) is SavedBagUnionIterator: return load_union(saved_plan, dataset) elif type(saved_plan) is SavedBindIterator: return load_bind(saved_plan, dataset) elif type(saved_plan) is SavedConstructIterator: return load_construct(saved_plan, dataset) else: raise Exception(f"Unknown iterator type '{type(saved_plan)}' when loading controls") except: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) logging.error(f"load_plan:{sys.exc_info()[0]}") raise
def encode_saved_plan(savedPlan: RootTree) -> str: """Encode a Protobuf-based saved plan into string format. Argument: A saved plan, encoded as a Protobuf message. Returns: The saved plan, encoded as a string of bytes. """ if savedPlan is None: return None bytes = savedPlan.SerializeToString() return b64encode(bytes).decode('utf-8')
async def execute(self, plan: PreemptableIterator, quantum: int, limit=inf) -> ExecutionResults: """Execute a preemptable physical query execution plan under a time quantum. Args: * plan: Root of the pipeline of iterator. * quantum: Time quantum used to execute the query. Returns: A tuple (``results``, ``saved_plan``, ``is_done``, ``abort_reason``) where: * ``results`` is a list of solution mappings found during query execution * ``saved_plan`` is the state of the plan saved using protocol-buffers * ``is_done`` is True when the plan has completed query evalution, False otherwise * ``abort_reason`` is True if the query was aborted due a to concurrency control issue Throws: Any exception raised during query execution. """ results: List[Dict[str, str]] = list() queue = Queue() loop = get_event_loop() query_done = False root = None abort_reason = None try: await wait_for(executor(plan, queue, limit), timeout=quantum) # loop.run_until_complete(task) query_done = True except StopAsyncIteration: pass except asyncTimeoutError: pass except TooManyResults: pass except DeleteInsertConflict as err: abort_reason = str(err) finally: if plan.serialized_name()=='construct': for s,p,o in plan.graph(): results.append({'s':s.n3(),'p':p.n3(),'o':o.n3()}) elif plan.serialized_name()=='reduc': #print(str(plan.results())) for b in plan.results(): results.append(b) else: while not queue.empty(): results.append(queue.get_nowait()) # save the plan if query execution is not done yet and no abort has occurred if (not query_done) and abort_reason is None: root = RootTree() source_field = plan.serialized_name() + '_source' getattr(root, source_field).CopyFrom(plan.save()) return (results, root, query_done, abort_reason)
def execute(self, plan, quota, limit=inf): """ Execute a preemptable physical query execution plan under a time quota. Args: - plan :class:`.PreemptableIterator` - The root of the plan - quota ``float`` - The time quota used for query execution Returns: A tuple (``results``, ``saved_plan``, ``is_done``) where: - ``results`` is a list of solution mappings found during query execution - ``saved_plan`` is the state of the plan saved using protocol-buffers - ``is_done`` is True when the plan has completed query evalution, False otherwise """ results = list() queue = Queue() query_done = False try: task = wait_for(executor(plan, queue, limit), timeout=quota) self._loop.run_until_complete(task) query_done = True except asyncTimeoutError: pass except GroupByOutOfMemoryException: pass except TooManyResults: pass finally: # dont forget to close the event loop or we get a Too Many open files OSError self._loop.close() # fetch partial aggregate if the query is an aggreation query if plan.is_aggregator(): results += plan.generate_results() # collect results from classic query while not queue.empty(): results.append(queue.get_nowait()) # print('Returning response ({}) ...'.format(time() - start)) # print('final results:', results) saved_plan = plan.save() root = RootTree() source_field = plan.serialized_name() + '_source' getattr(root, source_field).CopyFrom(saved_plan) return results, root, query_done
async def execute(self, plan: PreemptableIterator, context: dict) -> ExecutionResults: """Execute a preemptable physical query execution plan under a time quantum. Args: * plan: Root of the pipeline of iterator. * context: Information about the query execution. Returns: A tuple (``results``, ``saved_plan``, ``is_done``, ``abort_reason``) where: * ``results`` is a list of solution mappings found during query execution * ``saved_plan`` is the state of the plan saved using protocol-buffers * ``is_done`` is True when the plan has completed query evalution, False otherwise * ``abort_reason`` is True if the query was aborted due a to concurrency control issue Throws: Any exception raised during query execution. """ results: List[Dict[str, str]] = list() query_done = False root = None abort_reason = None try: context['start_timestamp'] = time() await executor(plan, results, context) query_done = True except QuantumExhausted: pass except TooManyResults: pass except DeleteInsertConflict as err: abort_reason = str(err) # save the plan if query execution is not done yet and no abort has occurred if not query_done and abort_reason is None: root = RootTree() source_field = plan.serialized_name() + '_source' getattr(root, source_field).CopyFrom(plan.save()) return (results, root, query_done, abort_reason)
def sage_query(config_file, default_graph_uri, query, file, limit): """ Execute a SPARQL query on an embedded Sage Server. Example usage: sage-query config.yaml http://example.org/swdf-postgres -f queries/spo.sparql """ # assert that we have a query to evaluate if query is None and file is None: print("Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations.") exit(1) if limit is None: limit = inf # load query from file if required if file is not None: with open(file) as query_file: query = query_file.read() # dataset = load_config(config_file) # if not dataset.has_graph(default_graph_uri): # print("Error: the config_file does not define your {default_graph_uri}.") client=TestClient(run_app(config_file)) nbResults = 0 nbCalls = 0 hasNext = True next_link = None count=0 start = time() while hasNext: response = post_sparql(client, query, next_link, default_graph_uri) response = response.json() nbResults += len(response['bindings']) hasNext = response['hasNext'] next_link = response['next'] nbCalls += 1 for bindings in response['bindings']: print(bindings) # for k,v in bindings.items(): # print(f"{v} ") if next_link is not None: saved_plan = next_link plan = decode_saved_plan(saved_plan) root = RootTree() root.ParseFromString(plan) prog,card=progress(root) logger.info(f"progression {prog}/{card}:{prog/card*100}%") count += 1 if count >= limit: break end= time() logger.info("finished in {}s".format(end-start)) logger.info("made {} calls".format(nbCalls)) logger.info("got {} mappings".format(nbResults))