Exemplo n.º 1
0
    def wait(
        self,
        num_returns: int = 1,
        timeout: Optional[float] = None
    ) -> Tuple[List[Workflow], List[Workflow]]:
        """Return a list of workflows that are ready and a list of workflows that
        are not. See `api.wait()` for details.

        Args:
            num_returns (int): The number of workflows that should be returned.
            timeout (float): The maximum amount of time in seconds to wait
            before returning.

        Returns:
            A list of workflows that are ready and a list of the remaining
            workflows.
        """
        if self.workflow_refs:
            raise ValueError("Currently, we do not support wait operations "
                             "on dynamic workflow refs. They are typically "
                             "generated by virtual actors.")
        refs_map = {w.ref: w for w in self.workflow_outputs}
        ready_ids, remaining_ids = ray.wait(list(refs_map.keys()),
                                            num_returns=num_returns,
                                            timeout=timeout)
        ready_workflows = [Workflow.from_ref(refs_map[i]) for i in ready_ids]
        remaining_workflows = [
            Workflow.from_ref(refs_map[i]) for i in remaining_ids
        ]
        return ready_workflows, remaining_workflows
Exemplo n.º 2
0
    def step(self, *args, **kwargs):
        flattened_args = signature.flatten_args(self._signature, args, kwargs)
        actor_id = workflow_context.get_current_workflow_id()
        if not self.readonly:
            if self._method_name == "__init__":
                state_ref = None
            else:
                ws = WorkflowStorage(actor_id, get_global_storage())
                state_ref = WorkflowRef(ws.get_entrypoint_step_id())
            # This is a hack to insert a positional argument.
            flattened_args = [signature.DUMMY_TYPE, state_ref] + flattened_args
        workflow_inputs = serialization_context.make_workflow_inputs(
            flattened_args)

        if self.readonly:
            _actor_method = _wrap_readonly_actor_method(
                actor_id, self._original_class, self._method_name)
        else:
            _actor_method = _wrap_actor_method(self._original_class,
                                               self._method_name)
        workflow_data = WorkflowData(
            func_body=_actor_method,
            inputs=workflow_inputs,
            name=self._name,
            step_options=self._options,
            user_metadata=self._user_metadata,
        )
        wf = Workflow(workflow_data)
        return wf
Exemplo n.º 3
0
        def _build_workflow(*args, **kwargs) -> Workflow:
            flattened_args = signature.flatten_args(self._func_signature, args, kwargs)

            def prepare_inputs():
                ensure_ray_initialized()
                return serialization_context.make_workflow_inputs(flattened_args)

            nonlocal step_options
            if step_options is None:
                step_options = WorkflowStepRuntimeOptions.make(
                    step_type=StepType.FUNCTION
                )
            # We could have "checkpoint=None" when we use @workflow.step
            # with arguments. Avoid this by updating it here.
            step_options.checkpoint = _inherit_checkpoint_option(
                step_options.checkpoint
            )

            workflow_data = WorkflowData(
                func_body=self._func,
                inputs=None,
                step_options=step_options,
                name=self._name,
                user_metadata=self._user_metadata,
            )
            return Workflow(workflow_data, prepare_inputs)
Exemplo n.º 4
0
def _reconstruct_wait_step(
    reader: workflow_storage.WorkflowStorage,
    result: workflow_storage.StepInspectResult,
    input_map: Dict[StepID, Any],
):
    input_workflows = []
    step_options = result.step_options
    wait_options = step_options.ray_options.get("wait_options", {})
    for i, _step_id in enumerate(result.workflows):
        # Check whether the step has been loaded or not to avoid
        # duplication
        if _step_id in input_map:
            r = input_map[_step_id]
        else:
            r = _construct_resume_workflow_from_step(reader, _step_id, input_map)
            input_map[_step_id] = r
        if isinstance(r, Workflow):
            input_workflows.append(r)
        else:
            assert isinstance(r, StepID)
            # TODO (Alex): We should consider caching these outputs too.
            output = reader.load_step_output(r)
            # Simulate a workflow with a workflow reference so it could be
            # used directly by 'workflow.wait'.
            static_ref = WorkflowStaticRef(step_id=r, ref=ray.put(output))
            wf = Workflow.from_ref(static_ref)
            input_workflows.append(wf)

    from ray import workflow

    return workflow.wait(input_workflows, **wait_options)
Exemplo n.º 5
0
            def step(method_name, method, *args, **kwargs):
                readonly = getattr(method, "__virtual_actor_readonly__", False)
                flattened_args = self.flatten_args(method_name, args, kwargs)
                actor_id = workflow_context.get_current_workflow_id()
                if not readonly:
                    if method_name == "__init__":
                        state_ref = None
                    else:
                        ws = WorkflowStorage(actor_id, get_global_storage())
                        state_ref = WorkflowRef(ws.get_entrypoint_step_id())
                    # This is a hack to insert a positional argument.
                    flattened_args = [signature.DUMMY_TYPE, state_ref
                                      ] + flattened_args
                workflow_inputs = serialization_context.make_workflow_inputs(
                    flattened_args)

                if readonly:
                    _actor_method = _wrap_readonly_actor_method(
                        actor_id, self.cls, method_name)
                    step_type = StepType.READONLY_ACTOR_METHOD
                else:
                    _actor_method = _wrap_actor_method(self.cls, method_name)
                    step_type = StepType.ACTOR_METHOD
                # TODO(suquark): Support actor options.
                workflow_data = WorkflowData(
                    func_body=_actor_method,
                    step_type=step_type,
                    inputs=workflow_inputs,
                    max_retries=1,
                    catch_exceptions=False,
                    ray_options={},
                    name=None,
                )
                wf = Workflow(workflow_data)
                return wf
Exemplo n.º 6
0
        def _build_workflow(*args, **kwargs) -> Workflow:
            flattened_args = signature.flatten_args(self._func_signature, args, kwargs)

            def prepare_inputs():
                ensure_ray_initialized()
                return serialization_context.make_workflow_inputs(flattened_args)

            workflow_data = WorkflowData(
                func_body=self._func,
                inputs=None,
                step_options=step_options,
                name=self._name,
                user_metadata=self._user_metadata,
            )
            return Workflow(workflow_data, prepare_inputs)
Exemplo n.º 7
0
        def _build_workflow(*args, **kwargs) -> Workflow:
            flattened_args = signature.flatten_args(self._func_signature, args,
                                                    kwargs)

            def prepare_inputs():
                ensure_ray_initialized()
                return serialization_context.make_workflow_inputs(
                    flattened_args)

            workflow_data = WorkflowData(
                func_body=self._func,
                step_type=StepType.FUNCTION,
                inputs=None,
                max_retries=self._max_retries,
                catch_exceptions=self._catch_exceptions,
                ray_options=self._ray_options,
                name=self._name,
            )
            return Workflow(workflow_data, prepare_inputs)
Exemplo n.º 8
0
def _construct_resume_workflow_from_step(
        workflow_id: str, step_id: StepID) -> Union[Workflow, Any]:
    """Try to construct a workflow (step) that recovers the workflow step.
    If the workflow step already has an output checkpointing file, we return
    the workflow step id instead.

    Args:
        workflow_id: The ID of the workflow.
        step_id: The ID of the step we want to recover.

    Returns:
        A workflow that recovers the step, or the output of the step
            if it has been checkpointed.
    """
    reader = workflow_storage.WorkflowStorage(workflow_id)

    # Step 1: construct dependency of the DAG (BFS)
    inpsect_results = {}
    dependency_map = defaultdict(list)
    num_in_edges = {}

    dag_visit_queue = deque([step_id])
    while dag_visit_queue:
        s: StepID = dag_visit_queue.popleft()
        if s in inpsect_results:
            continue
        r = reader.inspect_step(s)
        inpsect_results[s] = r
        if not r.is_recoverable():
            raise WorkflowStepNotRecoverableError(s)
        if r.output_object_valid:
            deps = []
        elif isinstance(r.output_step_id, str):
            deps = [r.output_step_id]
        else:
            deps = r.workflows
        for w in deps:
            dependency_map[w].append(s)
        num_in_edges[s] = len(deps)
        dag_visit_queue.extend(deps)

    # Step 2: topological sort to determine the execution order (Kahn's algorithm)
    execution_queue: List[StepID] = []

    start_nodes = deque(k for k, v in num_in_edges.items() if v == 0)
    while start_nodes:
        n = start_nodes.popleft()
        execution_queue.append(n)
        for m in dependency_map[n]:
            num_in_edges[m] -= 1
            assert num_in_edges[m] >= 0, (m, n)
            if num_in_edges[m] == 0:
                start_nodes.append(m)

    # Step 3: recover the workflow by the order of the execution queue
    with serialization.objectref_cache():
        # "input_map" is a context storing the input which has been loaded.
        # This context is important for deduplicate step inputs.
        input_map: Dict[StepID, Any] = {}

        for _step_id in execution_queue:
            result = inpsect_results[_step_id]
            if result.output_object_valid:
                input_map[_step_id] = reader.load_step_output(_step_id)
                continue
            if isinstance(result.output_step_id, str):
                input_map[_step_id] = input_map[result.output_step_id]
                continue

            # Process the wait step as a special case.
            if result.step_options.step_type == StepType.WAIT:
                wait_input_workflows = []
                for w in result.workflows:
                    output = input_map[w]
                    if isinstance(output, Workflow):
                        wait_input_workflows.append(output)
                    else:
                        # Simulate a workflow with a workflow reference so it could be
                        # used directly by 'workflow.wait'.
                        static_ref = WorkflowStaticRef(step_id=w,
                                                       ref=ray.put(output))
                        wait_input_workflows.append(
                            Workflow.from_ref(static_ref))
                recovery_workflow = ray.workflow.wait(
                    wait_input_workflows,
                    **result.step_options.ray_options.get("wait_options", {}),
                )
            else:
                args, kwargs = reader.load_step_args(
                    _step_id,
                    workflows=[input_map[w] for w in result.workflows],
                    workflow_refs=list(map(WorkflowRef, result.workflow_refs)),
                )
                func: Callable = reader.load_step_func_body(_step_id)
                # TODO(suquark): Use an alternative function when "workflow.step"
                # is fully deprecated.
                recovery_workflow = ray.workflow.step(func).step(
                    *args, **kwargs)

            # override step_options
            recovery_workflow._step_id = _step_id
            recovery_workflow.data.step_options = result.step_options

            input_map[_step_id] = recovery_workflow

    # Step 4: return the output of the requested step
    return input_map[step_id]
Exemplo n.º 9
0
def wait(workflows: List[Workflow],
         num_returns: int = 1,
         timeout: Optional[float] = None) -> Workflow[WaitResult]:
    """Return a list of result of workflows that are ready and a list of
    workflows that are pending.

    Examples:
        >>> tasks = [task.step() for _ in range(3)]
        >>> wait_step = workflow.wait(tasks, num_returns=1)
        >>> print(wait_step.run())
        ([result_1], [<Workflow object>, <Workflow object>])

        >>> tasks = [task.step() for _ in range(2)] + [forever.step()]
        >>> wait_step = workflow.wait(tasks, num_returns=3, timeout=10)
        >>> print(wait_step.run())
        ([result_1, result_2], [<Workflow object>])

    If timeout is set, the function returns either when the requested number of
    workflows are ready or when the timeout is reached, whichever occurs first.
    If it is not set, the function simply waits until that number of workflows
    is ready and returns that exact number of workflows.

    This method returns two lists. The first list consists of workflows
    references that correspond to workflows that are ready. The second
    list corresponds to the rest of the workflows (which may or may not be
    ready).

    Ordering of the input list of workflows is preserved. That is, if A
    precedes B in the input list, and both are in the ready list, then A will
    precede B in the ready list. This also holds true if A and B are both in
    the remaining list.

    This method will issue a warning if it's running inside an async context.

    Args:
        workflows (List[Workflow]): List of workflows that may
            or may not be ready. Note that these workflows must be unique.
        num_returns (int): The number of workflows that should be returned.
        timeout (float): The maximum amount of time in seconds to wait before
            returning.

    Returns:
        A list of ready workflow results that are ready and a list of the
        remaining workflows.
    """
    from ray.workflow import serialization_context
    from ray.workflow.common import WorkflowData
    for w in workflows:
        if not isinstance(w, Workflow):
            raise TypeError("The input of workflow.wait should be a list "
                            "of workflows.")
    wait_inputs = serialization_context.make_workflow_inputs(workflows)
    step_options = WorkflowStepRuntimeOptions.make(
        step_type=StepType.WAIT,
        # Pass the options through Ray options. "num_returns" conflicts with
        # the "num_returns" for Ray remote functions, so we need to wrap it
        # under "wait_options".
        ray_options={
            "wait_options": {
                "num_returns": num_returns,
                "timeout": timeout,
            }
        },
    )
    workflow_data = WorkflowData(func_body=None,
                                 inputs=wait_inputs,
                                 step_options=step_options,
                                 name="workflow.wait",
                                 user_metadata={})
    return Workflow(workflow_data)