Пример #1
0
 def _run_step(self, args, kwargs):
     task_id = uuid.uuid4()
     refs = self._remote_function.remote(
         workflow_context.get_workflow_step_context(), task_id, args,
         kwargs)
     outputs, output_ids = wrap_step_outputs(refs)
     return outputs
Пример #2
0
def execute_virtual_actor_step(step_id: "StepID",
                               workflow_data: "WorkflowData",
                               readonly: bool) -> "WorkflowOutputType":
    from ray.experimental.workflow.common import WorkflowStatus
    if not readonly:
        _record_step_status(step_id, WorkflowStatus.RUNNING)
    workflow_outputs = [w.execute() for w in workflow_data.inputs.workflows]
    step_inputs = (workflow_data.inputs.args, workflow_outputs,
                   workflow_data.inputs.object_refs)
    outer_most_step_id = ""
    if readonly:
        step_type = StepType.READONLY_ACTOR_METHOD
    else:
        step_type = StepType.ACTOR_METHOD
    ret = _workflow_step_executor.options(**workflow_data.ray_options).remote(
        step_type,
        workflow_data.func_body,
        workflow_context.get_workflow_step_context(),
        step_id,
        step_inputs,
        outer_most_step_id,
        workflow_data.catch_exceptions,
        workflow_data.max_retries,
        last_step_of_workflow=True)
    if readonly:
        return ret[1]  # only return output. skip state
    return ret
Пример #3
0
def execute_workflow_step(
        step_func: Callable, step_id: "StepID",
        step_inputs: "WorkflowInputTuple", catch_exceptions: bool,
        step_max_retries: int, ray_options: Dict[str, Any],
        outer_most_step_id: "StepID") -> "WorkflowOutputType":
    return _workflow_step_executor.options(**ray_options).remote(
        step_func, workflow_context.get_workflow_step_context(), step_id,
        step_inputs, outer_most_step_id, catch_exceptions, step_max_retries)
Пример #4
0
 def _run_step(
         self,
         step_id: StepID,
         step_inputs: WorkflowInputTuple,
         outer_most_step_id: Optional[StepID] = None) -> WorkflowOutputType:
     ref = workflow_step_executor.remote(
         self._func, workflow_context.get_workflow_step_context(), step_id,
         step_inputs, outer_most_step_id)
     return ref
Пример #5
0
 def __init__(self,
              workflow_id: Optional[str] = None,
              store: Optional[storage.Storage] = None):
     if workflow_id is None:
         context = workflow_context.get_workflow_step_context()
         workflow_id = context.workflow_id
     if store is None:
         store = storage.get_global_storage()
     self._storage = store
     self._workflow_id = workflow_id
Пример #6
0
def execute_workflow_step(
        step_func: Callable, step_id: "StepID",
        step_inputs: "WorkflowInputTuple", catch_exceptions: bool,
        step_max_retries: int, ray_options: Dict[str, Any],
        outer_most_step_id: "StepID") -> "WorkflowOutputType":
    from ray.experimental.workflow.common import WorkflowStatus
    _record_step_status(step_id, WorkflowStatus.RUNNING)
    return _workflow_step_executor.options(**ray_options).remote(
        step_func, workflow_context.get_workflow_step_context(), step_id,
        step_inputs, outer_most_step_id, catch_exceptions, step_max_retries)
Пример #7
0
def execute_workflow(
        workflow: "Workflow",
        outer_most_step_id: Optional[str] = None,
        last_step_of_workflow: bool = False) -> "WorkflowExecutionResult":
    """Execute workflow.

    To fully explain what we are doing, we need to introduce some syntax first.
    The syntax for dependencies between workflow steps
    "A.step(B.step())" is "A - B"; the syntax for nested workflow steps
    "def A(): return B.step()" is "A / B".

    In a chain/DAG of step dependencies, the "output step" is the step of last
    (topological) order. For example, in "A - B - C", C is the output step.

    In a chain of nested workflow steps, the initial "output step" is
    called the "outer most step" for other "output steps". For example, in
    "A / B / C / D", "A" is the outer most step for "B", "C", "D";
    in the hybrid workflow "((A - B) / C / D) - (E / (F - G) / H)",
    "B" is the outer most step for "C", "D"; "E" is the outer most step
    for "G", "H".

    Args:
        workflow: The workflow to be executed.
        outer_most_step_id: The ID of the outer most workflow. None if it
            does not exists. See "step_executor.execute_workflow" for detailed
            explanation.
        last_step_of_workflow: The step that generates the output of the
            workflow (including nested steps).
    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result
    workflow_data = workflow.data

    if workflow_data.step_type != StepType.READONLY_ACTOR_METHOD:
        _record_step_status(workflow.id, WorkflowStatus.RUNNING)

    baked_inputs = _BakedWorkflowInputs.from_workflow_inputs(
        workflow_data.inputs)
    persisted_output, volatile_output = _workflow_step_executor.options(
        **workflow_data.ray_options).remote(
            workflow_data.step_type, workflow_data.func_body,
            workflow_context.get_workflow_step_context(), workflow.id,
            baked_inputs, outer_most_step_id, workflow_data.catch_exceptions,
            workflow_data.max_retries, last_step_of_workflow)

    if not isinstance(persisted_output, WorkflowOutputType):
        raise TypeError("Unexpected return type of the workflow.")

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
Пример #8
0
def get_workflow_storage(workflow_id: Optional[str] = None) -> WorkflowStorage:
    """Get the storage for the workflow.

    Args:
        workflow_id: The ID of the storage.

    Returns:
        A workflow storage.
    """
    store = storage.get_global_storage()
    if workflow_id is None:
        workflow_id = workflow_context.get_workflow_step_context().workflow_id
    return WorkflowStorage(workflow_id, store)
Пример #9
0
def execute_workflow_step(step_id: "StepID", workflow_data: "WorkflowData",
                          outer_most_step_id: "StepID",
                          last_step_of_workflow: bool) -> "WorkflowOutputType":
    _record_step_status(step_id, WorkflowStatus.RUNNING)
    workflow_outputs = [w.execute() for w in workflow_data.inputs.workflows]
    # NOTE: Input placeholder is only a placeholder. It only can be
    # deserialized under a proper serialization context. Directly
    # deserialize the placeholder without a context would raise
    # an exception. If we pass the placeholder to _step_execution_function
    # as a direct argument, it would be deserialized by Ray without a
    # proper context. To prevent it, we put it inside a tuple.
    step_inputs = (workflow_data.inputs.args, workflow_outputs,
                   workflow_data.inputs.object_refs)
    return _workflow_step_executor.options(**workflow_data.ray_options).remote(
        StepType.FUNCTION, workflow_data.func_body,
        workflow_context.get_workflow_step_context(), step_id, step_inputs,
        outer_most_step_id, workflow_data.catch_exceptions,
        workflow_data.max_retries, last_step_of_workflow)[0]
Пример #10
0
def _resolve_dynamic_workflow_refs(workflow_refs: "List[WorkflowRef]"):
    """Get the output of a workflow step with the step ID at runtime.

    We lookup the output by the following order:
    1. Query cached step output in the workflow manager. Fetch the physical
       output object.
    2. If failed to fetch the physical output object, look into the storage
       to see whether the output is checkpointed. Load the checkpoint.
    3. If failed to load the checkpoint, resume the step and get the output.
    """
    workflow_manager = get_or_create_management_actor()
    context = workflow_context.get_workflow_step_context()
    workflow_id = context.workflow_id
    storage_url = context.storage_url
    workflow_ref_mapping = []
    for workflow_ref in workflow_refs:
        step_ref = ray.get(
            workflow_manager.get_cached_step_output.remote(
                workflow_id, workflow_ref.step_id))
        get_cached_step = False
        if step_ref is not None:
            try:
                output, _ = _resolve_object_ref(step_ref)
                get_cached_step = True
            except Exception:
                get_cached_step = False
        if not get_cached_step:
            wf_store = workflow_storage.get_workflow_storage()
            try:
                output = wf_store.load_step_output(workflow_ref.step_id)
            except DataLoadError:
                current_step_id = workflow_context.get_current_step_id()
                logger.warning("Failed to get the output of step "
                               f"{workflow_ref.step_id}. Trying to resume it. "
                               f"Current step: '{current_step_id}'")
                step_ref = recovery.resume_workflow_step(
                    workflow_id, workflow_ref.step_id,
                    storage_url).persisted_output
                output, _ = _resolve_object_ref(step_ref)
        workflow_ref_mapping.append(output)
    return workflow_ref_mapping
Пример #11
0
def execute_workflow_step(step_func: Callable, step_id: StepID,
                          step_inputs: WorkflowInputTuple,
                          outer_most_step_id: StepID) -> WorkflowOutputType:
    return _workflow_step_executor.remote(
        step_func, workflow_context.get_workflow_step_context(), step_id,
        step_inputs, outer_most_step_id)
Пример #12
0
 def _run_step(self, step_id: StepID,
               step_inputs: WorkflowInputTuple) -> WorkflowOutputType:
     ref = self._remote_function.remote(
         workflow_context.get_workflow_step_context(), step_id, step_inputs)
     return ref