Пример #1
0
def load(saved_plan: SavedProtobufPlan, dataset: Dataset, context: dict) -> PreemptableIterator:
    """Load a preemptable physical query execution plan from a saved state.

    Args:
      * saved_plan: Saved query execution plan.
      * dataset: RDF dataset used to execute the plan.
      * context: Information about the query execution.

    Returns:
      The pipeline of iterator used to continue query execution.
    """
    # unpack the plan from the serialized protobuf message
    if isinstance(saved_plan, bytes):
        root = RootTree()
        root.ParseFromString(saved_plan)
        sourceField = root.WhichOneof('source')
        saved_plan = getattr(root, sourceField)
    # load the plan based on the current node
    if type(saved_plan) is SavedFilterIterator:
        return load_filter(saved_plan, dataset, context)
    if type(saved_plan) is SavedProjectionIterator:
        return load_projection(saved_plan, dataset, context)
    elif type(saved_plan) is SavedScanIterator:
        return load_scan(saved_plan, dataset, context)
    elif type(saved_plan) is SavedIndexJoinIterator:
        return load_nlj(saved_plan, dataset, context)
    elif type(saved_plan) is SavedBagUnionIterator:
        return load_union(saved_plan, dataset, context)
    else:
        raise Exception(f"Unknown iterator type '{type(saved_plan)}' when loading controls")
Пример #2
0
def load(protoMsg, dataset):
    """Load a preemptable physical query execution plan from a saved state"""
    saved_plan = protoMsg
    if isinstance(protoMsg, bytes):
        root = RootTree()
        root.ParseFromString(protoMsg)
        sourceField = root.WhichOneof('source')
        saved_plan = getattr(root, sourceField)
    if type(saved_plan) is SavedFilterIterator:
        return load_filter(saved_plan, dataset)
    if type(saved_plan) is SavedProjectionIterator:
        return load_projection(saved_plan, dataset)
    elif type(saved_plan) is SavedScanIterator:
        return load_scan(saved_plan, dataset)
    elif type(saved_plan) is SavedIndexJoinIterator:
        return load_nlj(saved_plan, dataset)
    elif type(saved_plan) is SavedBagUnionIterator:
        return load_union(saved_plan, dataset)
    elif type(saved_plan) is SavedGroupByAgg:
        return load_groupby(saved_plan, dataset)
    elif type(saved_plan) is SavedAggregatesProjectionIterator:
        return load_aggregates_projection(saved_plan, dataset)
    else:
        raise Exception('Unknown iterator type "%s" when loading controls' %
                        type(saved_plan))
Пример #3
0
def load(saved_plan: SavedProtobufPlan, dataset: Dataset) -> PreemptableIterator:
    """Load a preemptable physical query execution plan from a saved state.

    Args:
      * saved_plan: Saved query execution plan.
      * dataset: RDF dataset used to execute the plan.

    Returns:
      The pipeline of iterator used to continue query execution.
    """
    # unpack the plan from the serialized protobuf message
    try:
#        print(f"...{type(saved_plan)}...")
        if isinstance(saved_plan, bytes):
            root = RootTree()
            root.ParseFromString(saved_plan)
            sourceField = root.WhichOneof('source')
            saved_plan = getattr(root, sourceField)
        # load the plan based on the current node
        if type(saved_plan) is SavedFilterIterator:
            return load_filter(saved_plan, dataset)
        elif type(saved_plan) is SavedProjectionIterator:
            return load_projection(saved_plan, dataset)
        elif type(saved_plan) is SavedReducedIterator:
            return load_reduced(saved_plan, dataset)
        elif type(saved_plan) is SavedScanIterator:
            return load_scan(saved_plan, dataset)
        elif type(saved_plan) is SavedIndexJoinIterator:
            return load_nlj(saved_plan, dataset)
        elif type(saved_plan) is SavedBagUnionIterator:
            return load_union(saved_plan, dataset)
        elif type(saved_plan) is SavedBindIterator:
            return load_bind(saved_plan, dataset)
        elif type(saved_plan) is SavedConstructIterator:
            return load_construct(saved_plan, dataset)
        else:
            raise Exception(f"Unknown iterator type '{type(saved_plan)}' when loading controls")
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
        logging.error(f"load_plan:{sys.exc_info()[0]}")
        raise
Пример #4
0
def encode_saved_plan(savedPlan: RootTree) -> str:
    """Encode a Protobuf-based saved plan into string format.

    Argument: A saved plan, encoded as a Protobuf message.

    Returns: The saved plan, encoded as a string of bytes.
    """
    if savedPlan is None:
        return None
    bytes = savedPlan.SerializeToString()
    return b64encode(bytes).decode('utf-8')
Пример #5
0
    async def execute(self, plan: PreemptableIterator, quantum: int, limit=inf) -> ExecutionResults:
        """Execute a preemptable physical query execution plan under a time quantum.

        Args:
          * plan: Root of the pipeline of iterator.
          * quantum: Time quantum used to execute the query.

        Returns: A tuple (``results``, ``saved_plan``, ``is_done``, ``abort_reason``) where:
          * ``results`` is a list of solution mappings found during query execution
          * ``saved_plan`` is the state of the plan saved using protocol-buffers
          * ``is_done`` is True when the plan has completed query evalution, False otherwise
          * ``abort_reason`` is True if the query was aborted due a to concurrency control issue

        Throws: Any exception raised during query execution.
        """
        results: List[Dict[str, str]] = list()
        queue = Queue()
        loop = get_event_loop()
        query_done = False
        root = None
        abort_reason = None
        try:
            await wait_for(executor(plan, queue, limit), timeout=quantum)
            # loop.run_until_complete(task)
            query_done = True
        except StopAsyncIteration:
            pass
        except asyncTimeoutError:
            pass
        except TooManyResults:
            pass
        except DeleteInsertConflict as err:
            abort_reason = str(err)
        finally:
            if plan.serialized_name()=='construct':
                for s,p,o in plan.graph():
                        results.append({'s':s.n3(),'p':p.n3(),'o':o.n3()})
            elif plan.serialized_name()=='reduc':
                #print(str(plan.results()))
                for b in plan.results():
                    results.append(b)
            else:
                while not queue.empty():
                    results.append(queue.get_nowait())
        # save the plan if query execution is not done yet and no abort has occurred
        if (not query_done) and abort_reason is None:
            root = RootTree()
            source_field = plan.serialized_name() + '_source'
            getattr(root, source_field).CopyFrom(plan.save())
        return (results, root, query_done, abort_reason)
    def execute(self, plan, quota, limit=inf):
        """
            Execute a preemptable physical query execution plan under a time quota.

            Args:
                - plan :class:`.PreemptableIterator` - The root of the plan
                - quota ``float`` - The time quota used for query execution

            Returns:
                A tuple (``results``, ``saved_plan``, ``is_done``) where:
                - ``results`` is a list of solution mappings found during query execution
                - ``saved_plan`` is the state of the plan saved using protocol-buffers
                - ``is_done`` is True when the plan has completed query evalution, False otherwise
        """
        results = list()
        queue = Queue()
        query_done = False
        try:
            task = wait_for(executor(plan, queue, limit), timeout=quota)
            self._loop.run_until_complete(task)
            query_done = True
        except asyncTimeoutError:
            pass
        except GroupByOutOfMemoryException:
            pass
        except TooManyResults:
            pass
        finally:
            # dont forget to close the event loop or we get a Too Many open files OSError
            self._loop.close()
            # fetch partial aggregate if the query is an aggreation query
            if plan.is_aggregator():
                results += plan.generate_results()
            # collect results from classic query
            while not queue.empty():
                results.append(queue.get_nowait())
            # print('Returning response ({}) ...'.format(time() - start))
        # print('final results:', results)
        saved_plan = plan.save()
        root = RootTree()
        source_field = plan.serialized_name() + '_source'
        getattr(root, source_field).CopyFrom(saved_plan)
        return results, root, query_done
Пример #7
0
    async def execute(self, plan: PreemptableIterator,
                      context: dict) -> ExecutionResults:
        """Execute a preemptable physical query execution plan under a time quantum.

        Args:
          * plan: Root of the pipeline of iterator.
          * context: Information about the query execution.

        Returns: A tuple (``results``, ``saved_plan``, ``is_done``, ``abort_reason``) where:
          * ``results`` is a list of solution mappings found during query execution
          * ``saved_plan`` is the state of the plan saved using protocol-buffers
          * ``is_done`` is True when the plan has completed query evalution, False otherwise
          * ``abort_reason`` is True if the query was aborted due a to concurrency control issue

        Throws: Any exception raised during query execution.
        """
        results: List[Dict[str, str]] = list()
        query_done = False
        root = None
        abort_reason = None
        try:
            context['start_timestamp'] = time()
            await executor(plan, results, context)
            query_done = True
        except QuantumExhausted:
            pass
        except TooManyResults:
            pass
        except DeleteInsertConflict as err:
            abort_reason = str(err)
        # save the plan if query execution is not done yet and no abort has occurred
        if not query_done and abort_reason is None:
            root = RootTree()
            source_field = plan.serialized_name() + '_source'
            getattr(root, source_field).CopyFrom(plan.save())
        return (results, root, query_done, abort_reason)
Пример #8
0
def sage_query(config_file, default_graph_uri, query, file, limit):
    """
        Execute a SPARQL query on an embedded Sage Server.

        Example usage: sage-query config.yaml http://example.org/swdf-postgres -f queries/spo.sparql
    """
    # assert that we have a query to evaluate
    if query is None and file is None:
        print("Error: you must specificy a query to execute, either with --query or --file. See sage-query --help for more informations.")
        exit(1)

    if limit is None:
        limit = inf

    # load query from file if required
    if file is not None:
        with open(file) as query_file:
            query = query_file.read()

    # dataset = load_config(config_file)
    # if not dataset.has_graph(default_graph_uri):
    #     print("Error: the config_file does not define your {default_graph_uri}.")
    client=TestClient(run_app(config_file))

    nbResults = 0
    nbCalls = 0
    hasNext = True
    next_link = None
    count=0
    start = time()

    while hasNext:
        response = post_sparql(client, query, next_link, default_graph_uri)
        response = response.json()
        nbResults += len(response['bindings'])
        hasNext = response['hasNext']
        next_link = response['next']


        nbCalls += 1
        for bindings in response['bindings']:
            print(bindings)
#            for k,v in bindings.items():
#                print(f"{v} ")

        if next_link is not None:
            saved_plan = next_link
            plan = decode_saved_plan(saved_plan)
            root = RootTree()
            root.ParseFromString(plan)
            prog,card=progress(root)
            logger.info(f"progression {prog}/{card}:{prog/card*100}%")


        count += 1
        if count >= limit:
            break

    end= time()
    logger.info("finished in {}s".format(end-start))
    logger.info("made {} calls".format(nbCalls))
    logger.info("got {} mappings".format(nbResults))