Exemplo n.º 1
0
def test_out_of_order_line(insertion_order):
    tracker = RootTracker()
    for node in insertion_order:
        tracker.add(node, node - 1)

    for expected_root in range(9):
        for node in range(expected_root, 10):
            root, depth = tracker.get_root(node)
            assert depth == node - expected_root
            assert root == expected_root

        # always prune from the end
        prune_root_id, _ = tracker.get_root(9)
        tracker.prune(prune_root_id)

        # root should not be retrievable anymore
        with pytest.raises(ValidationError):
            tracker.get_root(prune_root_id)
Exemplo n.º 2
0
    def __init__(self,
                 prerequisites: Type[TPrerequisite],
                 id_extractor: Callable[[TTask], TTaskID],
                 dependency_extractor: Callable[[TTask], TTaskID],
                 accept_dangling_tasks: bool = False,
                 max_depth: int = None) -> None:

        self._prereq_tracker = BaseTaskPrerequisites.from_enum(prerequisites)
        self._id_of = id_extractor
        self._dependency_of = dependency_extractor
        self._accept_dangling_tasks = accept_dangling_tasks

        # how long to wait before pruning
        if max_depth is None:
            self._max_depth = self._default_max_depth
        elif max_depth < 0:
            raise ValidationError(
                f"The maximum depth must be at least 0, not {max_depth}")
        else:
            self._max_depth = max_depth

        # all of the tasks that have been completed, and not pruned
        self._tasks: Dict[TTaskID, BaseTaskPrerequisites[TTask,
                                                         TPrerequisite]] = {}

        # In self._dependencies, when the key becomes ready, the task ids in the
        # value set *might* also become ready
        # (they only become ready if their prerequisites are complete)
        self._dependencies: Dict[TTaskID, Set[TTaskID]] = defaultdict(set)

        # task ids are in this set if either:
        # - one of their prerequisites is incomplete OR
        # - their dependent task is not ready
        self._unready: Set[TTaskID] = set()

        # This is a queue of tasks that have become ready, in order.
        # They wait in this Queue until being returned by ready_tasks().
        self._ready_tasks: 'Queue[TTask]' = Queue()

        # Declared finished with set_finished_dependency()
        self._declared_finished: Set[TTaskID] = set()

        self._roots = RootTracker()
        self._last_yielded_tasks: Tuple[TTask, ...] = tuple()
Exemplo n.º 3
0
def test_full_branching(insertion_order):
    """Test full binary tree, in random order"""
    tracker = RootTracker()
    for node in insertion_order:
        tracker.add(node, binary_parent(node))

    # prune all the way to the leaf of (3, 0)
    for num_prunings in range(3):
        root_id, depth = tracker.get_root((3, 0))
        assert root_id[0] == num_prunings
        assert depth == 3 - num_prunings
        tracker.prune(root_id)
        assert tracker.get_root((3, 7)) == ((1, 1), 2)
Exemplo n.º 4
0
def test_sparse_branching(test_data):
    nodes_to_insert, prune_order = test_data

    def get_expected_root(node, present_nodes):
        expected_depth = 0
        expected_root = node
        parent_node = binary_parent(node)
        while parent_node in present_nodes:
            expected_depth += 1
            expected_root = parent_node
            parent_node = binary_parent(parent_node)
        return expected_root, expected_depth

    tracker = RootTracker()
    for node in nodes_to_insert:
        tracker.add(node, binary_parent(node))

    # verify parent and depth of partially-built tree
    for node in nodes_to_insert:
        actual_root, actual_depth = tracker.get_root(node)
        expected_root, expected_depth = get_expected_root(
            node, nodes_to_insert)
        assert actual_root == expected_root
        assert actual_depth == expected_depth

    # prune
    remaining_nodes = set(nodes_to_insert)
    for prune_idx in [
            idx for idx in prune_order if idx < len(nodes_to_insert)
    ]:
        node_to_prune_from = nodes_to_insert[prune_idx]
        if node_to_prune_from not in remaining_nodes:
            continue
        prune_root_id, _ = tracker.get_root(node_to_prune_from)
        tracker.prune(prune_root_id)
        remaining_nodes.remove(prune_root_id)

        for node in remaining_nodes:
            actual_root, actual_depth = tracker.get_root(node)
            expected_root, expected_depth = get_expected_root(
                node, remaining_nodes)
            assert actual_root == expected_root
            assert actual_depth == expected_depth
Exemplo n.º 5
0
def test_prune_reinsert_root_tracking_linear(element_flipping):
    tracker = RootTracker()

    present = set()
    for node in element_flipping:
        if node in present:
            prune_root_id, _ = tracker.get_root(node)
            tracker.prune(prune_root_id)
            present.remove(prune_root_id)
        else:
            tracker.add(node, node - 1)
            present.add(node)

        # validate all the present nodes have valid roots
        for test_node in present:
            root_id, depth = tracker.get_root(test_node)

            # make sure parent is *not* present
            assert root_id - 1 not in present

            # make sure depth is correct
            assert depth == test_node - root_id
Exemplo n.º 6
0
def test_prune_reinsert_root_tracking_binary_tree(element_flipping):
    tracker = RootTracker()

    present = set()
    for node_id in element_flipping:
        node = FULL_BINARY_TREE[node_id]
        if node in present:
            prune_root_id, _ = tracker.get_root(node)
            tracker.prune(prune_root_id)
            present.remove(prune_root_id)
        else:
            tracker.add(node, binary_parent(node))
            present.add(node)

        # validate all the present nodes have valid roots
        for test_node in present:
            root_node, depth = tracker.get_root(test_node)

            # make sure parent is *not* present
            assert binary_parent(root_node) not in present

            # make sure depth is correct
            assert depth == test_node[0] - root_node[0]
Exemplo n.º 7
0
class OrderedTaskPreparation(BaseOrderedTaskPreparation[TTask, TTaskID],
                             Generic[TTask, TTaskID, TPrerequisite]):
    """
    This class is useful when a series of tasks with prerequisites must be run
    sequentially. The prerequisites may be finished in any order, but the
    tasks may only be run when all prerequisites are complete, and the
    dependent task is also complete. Tasks may only depend on one other task.

    For example, you might want to download block bodies and receipts at
    random, but need to import them sequentially. Importing blocks is the ``task``,
    downloading the parts is the ``prerequisite``, and a block's parent is its
    ``dependency``.

    Below is a sketch of how to do that:

        # The complete list of prerequisites to complete
        class BlockDownloads(Enum):
            receipts = auto()
            bodies = auto()

        block_import_tasks = OrderedTaskPreparation(
            BlockDownloads,

            # we use this method to extract an ID from the header:
            lambda header: header.hash,

            # we use this method to extract the ID of the dependency,
            # so that we can guarantee that the parent block gets imported first
            lambda header: header.parent_hash,
        )

        # We mark the genesis block as already imported, so header1 is ready
        # as soon as its prerequisites are complete.
        block_import_tasks.set_finished_dependency(header0)

        # We register the tasks before completing any prerequisites
        block_import_tasks.register_tasks((header1, header2, header3))

        # Start download of bodies & receipts...

        # They complete in random order

        # we notify this class which prerequisites are complete:
        block_import_tasks.finish_prereq(BlockDownloads.receipts, (header2, header3))
        block_import_tasks.finish_prereq(BlockDownloads.bodies, (header1, header2))

        # this await would hang, waiting on the receipt from header1:
        # await block_import_tasks.ready_tasks()

        block_import_tasks.finish_prereq(BlockDownloads.receipts, (header1, ))

        # now we have all the necessary info to import blocks 1 and 2
        headers_ready_to_import = await block_import_tasks.ready_tasks()

        # these will always return in sequential order:
        assert headers_ready_to_import == (header1, header2)

    In a real implementation, you would have a loop waiting on
    :meth:`ready_tasks` and import them, rather than interleaving them like
    the above example.

    Note that this class does *not* track when the main tasks are
    complete. It is assumed that the caller will complete the tasks in the
    order they are returned by ready_tasks().

    The memory needs of this class would naively be unbounded. Any newly
    registered task might depend on any other task in history. To prevent
    unbounded memory usage, old tasks are pruned after a configurable depth.
    Pruning is triggered when `ready_tasks()` is called, starting from the
    tail of the *previous* ready_tasks() result.

    Vocab:

    - prerequisites: all these must be completed for a task to be ready
        (a necessary but not sufficient condition)
    - ready: a task is ready after all its prereqs are completed, and the task it depends on is
        also ready. The initial ready task is set with :meth:`set_finished_dependency`
    """
    # methods to extract the id and dependency IDs out of a task
    _id_of: StaticMethod[Callable[[TTask], TTaskID]]
    _dependency_of: StaticMethod[Callable[[TTask], TTaskID]]

    # by default, how long should the integrator wait before pruning?
    _default_max_depth = 10  # not sure how to pick a good default here

    _prereq_tracker: Type[BaseTaskPrerequisites[TTask, TPrerequisite]]

    # track roots
    _roots: RootTracker[TTaskID]

    NoPrerequisites = NoPrerequisites
    """
    This is a helper to identify that no prerequisites are required at all, only ordering of tasks
    It can be used like so: `OrderedTaskPreparation(OrderedTaskPreparation.NoPrerequisites, ...)`
    """
    def __init__(self,
                 prerequisites: Type[TPrerequisite],
                 id_extractor: Callable[[TTask], TTaskID],
                 dependency_extractor: Callable[[TTask], TTaskID],
                 accept_dangling_tasks: bool = False,
                 max_depth: int = None) -> None:

        self._prereq_tracker = BaseTaskPrerequisites.from_enum(prerequisites)
        self._id_of = id_extractor
        self._dependency_of = dependency_extractor
        self._accept_dangling_tasks = accept_dangling_tasks

        # how long to wait before pruning
        if max_depth is None:
            self._max_depth = self._default_max_depth
        elif max_depth < 0:
            raise ValidationError(
                f"The maximum depth must be at least 0, not {max_depth}")
        else:
            self._max_depth = max_depth

        # all of the tasks that have been completed, and not pruned
        self._tasks: Dict[TTaskID, BaseTaskPrerequisites[TTask,
                                                         TPrerequisite]] = {}

        # In self._dependencies, when the key becomes ready, the task ids in the
        # value set *might* also become ready
        # (they only become ready if their prerequisites are complete)
        self._dependencies: Dict[TTaskID, Set[TTaskID]] = defaultdict(set)

        # task ids are in this set if either:
        # - one of their prerequisites is incomplete OR
        # - their dependent task is not ready
        self._unready: Set[TTaskID] = set()

        # This is a queue of tasks that have become ready, in order.
        # They wait in this Queue until being returned by ready_tasks().
        self._ready_tasks: 'Queue[TTask]' = Queue()

        # Declared finished with set_finished_dependency()
        self._declared_finished: Set[TTaskID] = set()

        self._roots = RootTracker()
        self._last_yielded_tasks: Tuple[TTask, ...] = tuple()

    def set_finished_dependency(self, finished_task: TTask) -> None:
        """
        Mark this task as already finished. This is a bootstrapping method. In general,
        tasks are marked as finished by :meth:`finish_prereq`. But how do we know which task is
        first, and that its dependency is complete? We call `set_finished_dependency`.

        Since a task can only become ready when its dependent
        task is ready, the first result from ready_tasks will be dependent on
        finished_task set in this method. (More precisely, it will be dependent on *one of*
        the ``finished_task`` objects set with this method, since the method may be called
        multiple times)
        """
        completed = self._prereq_tracker(finished_task)
        completed.set_complete()
        task_id = self._id_of(finished_task)
        if task_id in self._tasks:
            raise DuplicateTasks(
                f"Can't set a new finished dependency {finished_task} id:{task_id}, "
                "it's already registered",
                (finished_task, ),
            )
        self._tasks[task_id] = completed
        self._declared_finished.add(task_id)

        dependency_id = self._dependency_of(finished_task)
        self._roots.add(task_id, dependency_id)
        if dependency_id in self._tasks:
            # set a finished dependency that has a parent already entered. Mark this as a dependency
            self._dependencies[dependency_id].add(task_id)

        # note that this task is intentionally *not* added to self._unready

    def register_tasks(self,
                       tasks: Tuple[TTask, ...],
                       ignore_duplicates: bool = False) -> None:
        """
        Initiate a task into tracking. By default, each task must be registered
        *after* its dependency has been registered.

        If you want to be able to register non-contiguous tasks, you can
        initialize this intance with: ``accept_dangling_tasks=True``.

        :param tasks: the tasks to register, in iteration order
        :param ignore_duplicates: any tasks that have already been registered will be ignored,
            whether ready or not
        """
        identified_tasks = tuple((self._id_of(task), task) for task in tasks)
        duplicates = tuple(task for task_id, task in identified_tasks
                           if task_id in self._tasks)

        if duplicates and not ignore_duplicates:
            raise DuplicateTasks(
                f"Cannot re-register tasks: {duplicates!r} for completion",
                duplicates,
            )

        task_meta_info = tuple(
            (self._prereq_tracker(task), task_id, self._dependency_of(task))
            for task_id, task in identified_tasks
            # when ignoring duplicates, must not try to re-add them
            if task_id not in self._tasks)

        for prereq_tracker, task_id, dependency_id in task_meta_info:
            if not self._accept_dangling_tasks and dependency_id not in self._tasks:
                raise MissingDependency(
                    f"Cannot prepare task {prereq_tracker!r} with id {task_id} and "
                    f"dependency {dependency_id} before preparing its dependency "
                    f"among tasks {task_meta_info!r}, from the original registration: "
                    f"{tasks!r}.")
            else:
                self._tasks[task_id] = prereq_tracker
                self._unready.add(task_id)
                self._dependencies[dependency_id].add(task_id)
                self._roots.add(task_id, dependency_id)

                if prereq_tracker.is_complete and self._is_ready(
                        prereq_tracker.task):
                    # this is possible for tasks with 0 prerequisites (useful for pure ordering)
                    self._mark_complete(task_id)

    def finish_prereq(self, prereq: TPrerequisite, tasks: Tuple[TTask,
                                                                ...]) -> None:
        """For every task in tasks, mark the given prerequisite as completed"""
        if len(self._tasks) == 0:
            raise ValidationError(
                "Cannot finish a task until set_last_completion() is called")

        for task in tasks:
            task_id = self._id_of(task)
            if task_id not in self._tasks:
                raise ValidationError(
                    f"Cannot finish task {task_id!r} before preparing it")
            elif task_id not in self._unready:
                raise ValidationError(
                    f"Cannot finish prereq {prereq} of task {task} id:{task_id!r} that is complete"
                )

            task_completion = self._tasks[task_id]
            task_completion.finish(prereq)
            if task_completion.is_complete and self._is_ready(task):
                self._mark_complete(task_id)

    async def ready_tasks(self, max_results: int = None) -> Tuple[TTask, ...]:
        """
        Return the next batch of tasks that are ready to process. If none are ready,
        hang until at least one task becomes ready.
        """
        for completed_task in self._last_yielded_tasks:
            task_id = self._id_of(completed_task)
            # Attempt pruning at least twice (to eventually catch up after forks)
            # re-running is okay, because pruning limits the prune depth
            self._prune_finished(task_id)
            self._prune_finished(task_id)

        self._last_yielded_tasks = await queue_get_batch(
            self._ready_tasks, max_results)
        return self._last_yielded_tasks

    def has_ready_tasks(self) -> bool:
        return not self._ready_tasks.empty()

    def _is_ready(self, task: TTask) -> bool:
        dependency = self._dependency_of(task)
        if dependency in self._declared_finished:
            # Ready by declaration
            return True
        elif dependency in self._tasks and dependency not in self._unready:
            # Ready by insertion and tracked completion
            return True
        else:
            return False

    def _mark_complete(self, task_id: TTaskID) -> None:
        qualified_tasks = tuple([task_id])
        while qualified_tasks:
            qualified_tasks = tuple(
                mapcat(
                    self._mark_one_task_complete,
                    qualified_tasks,
                ))

    @to_tuple
    def _mark_one_task_complete(self, task_id: TTaskID) -> Iterable[TTaskID]:
        """
        Called when this task is completed and its dependency is complete, for the first time

        :return: any task IDs that can now also be marked as complete
        """
        task_completion = self._tasks[task_id]

        # put this task in the completed queue
        self._ready_tasks.put_nowait(task_completion.task)

        # note that this task has been made ready
        self._unready.remove(task_id)

        # resolve tasks that depend on this task
        for depending_task_id in self._dependencies[task_id]:
            # we already know that this task is ready, so we only need to check completion
            if self._tasks[depending_task_id].is_complete:
                yield depending_task_id

    def _prune_finished(self, task_id: TTaskID) -> None:
        """
        This prunes the oldest data, if it starts more than _max_depth in history.
        It is called when the task has been consumed by the caller via
        `ready_tasks()` and completed. The workflow looks something like:

        :return: True if a node was pruned

        ::

            otp = OrderedTaskPreparation(...)
            otp.register_tasks(range(3))
            otp.finish_prereq(OnlyPrereq, (0, 1))
            assert await otp.ready_tasks() == (0, 1)

            # Do some processing on ready tasks (0, 1) ...
            # Complete processing on ready tasks (0, 1)

            await otp.ready_tasks()
            # ^ when this is called, pruning is triggered from
            # the tip of task 1 (whether or not task 2 is ready)
        """
        root_task_id, depth = self._roots.get_root(task_id)
        num_to_prune = depth - self._max_depth
        if num_to_prune <= 0:
            return
        else:
            self._prune(root_task_id)

    def _prune(self, prune_task_id: TTaskID) -> None:
        # _roots.prune() has validation in it, so if there is a problem, we should skip the rest
        self._roots.prune(prune_task_id)
        del self._dependencies[prune_task_id]
        del self._tasks[prune_task_id]
        if prune_task_id in self._declared_finished:
            self._declared_finished.remove(prune_task_id)