Пример #1
0
def execute_workflow(workflow: Workflow) -> "WorkflowExecutionResult":
    """Execute workflow.

    This function also performs tail-recursion optimization for inplace
    workflow steps.

    Args:
        workflow: The workflow to be executed.
    Returns:
        An object ref that represent the result.
    """
    # Tail recursion optimization.
    context = {}
    while True:
        with workflow_context.fork_workflow_step_context(**context):
            result = _execute_workflow(workflow)
        if not isinstance(result.persisted_output, InplaceReturnedWorkflow):
            break
        workflow = result.persisted_output.workflow
        context = result.persisted_output.context

    # Convert the outputs into ObjectRefs.
    if not isinstance(result.persisted_output, WorkflowOutputType):
        result.persisted_output = ray.put(result.persisted_output)
    if not isinstance(result.persisted_output, WorkflowOutputType):
        result.volatile_output = ray.put(result.volatile_output)
    return result
Пример #2
0
 def from_workflow_inputs(cls, inputs: "WorkflowInputs"):
     with workflow_context.fork_workflow_step_context(
             outer_most_step_id=None, last_step_of_workflow=False):
         workflow_outputs = [
             execute_workflow(w).persisted_output for w in inputs.workflows
         ]
     return cls(inputs.args, workflow_outputs, inputs.workflow_refs)
Пример #3
0
def execute_workflow(job_id, workflow: Workflow) -> "WorkflowExecutionResult":
    """Execute workflow.

    This function also performs tail-recursion optimization for inplace
    workflow steps.

    Args:
        workflow: The workflow to be executed.
    Returns:
        An object ref that represent the result.
    """
    # Tail recursion optimization.
    context = {}
    while True:
        with workflow_context.fork_workflow_step_context(**context):
            result = _execute_workflow(job_id, workflow)
        if not isinstance(result.output, InplaceReturnedWorkflow):
            break
        workflow = result.output.workflow
        context = result.output.context

    # Convert the outputs into WorkflowStaticRef.
    result.output = WorkflowStaticRef.from_output(workflow.step_id,
                                                  result.output)
    return result
Пример #4
0
def execute_workflow(workflow: "Workflow") -> "WorkflowExecutionResult":
    """Execute workflow.

    Args:
        workflow: The workflow to be executed.

    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result

    # Stage 1: prepare inputs
    workflow_data = workflow.data
    inputs = workflow_data.inputs
    workflow_outputs = []
    with workflow_context.fork_workflow_step_context(
            outer_most_step_id=None, last_step_of_workflow=False):
        for w in inputs.workflows:
            static_ref = w.ref
            if static_ref is None:
                # The input workflow is not a reference to an executed
                # workflow .
                output = execute_workflow(w).persisted_output
                static_ref = WorkflowStaticRef(step_id=w.step_id, ref=output)
            workflow_outputs.append(static_ref)

    baked_inputs = _BakedWorkflowInputs(
        args=workflow_data.inputs.args,
        workflow_outputs=workflow_outputs,
        workflow_refs=inputs.workflow_refs,
    )

    # Stage 2: match executors
    step_options = workflow_data.step_options
    if step_options.allow_inplace:
        # TODO(suquark): For inplace execution, it is impossible
        # to get the ObjectRef of the output before execution.
        # Here we use a dummy ObjectRef, because _record_step_status does not
        # even use it (?!).
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [ray.put(None)])
        # Note: we need to be careful about workflow context when
        # calling the executor directly.
        # TODO(suquark): We still have recursive Python calls.
        # This would cause stack overflow if we have a really
        # deep recursive call. We should fix it later.
        if step_options.step_type == StepType.WAIT:
            executor = _workflow_wait_executor
        else:
            executor = _workflow_step_executor
    else:
        if step_options.step_type == StepType.WAIT:
            # This is very important to set "num_cpus=0" to
            # ensure "workflow.wait" is not blocked by other
            # tasks.
            executor = _workflow_wait_executor_remote.options(
                num_cpus=0).remote
        else:
            executor = _workflow_step_executor_remote.options(
                **step_options.ray_options).remote

    # Stage 3: execution
    persisted_output, volatile_output = executor(
        workflow_data.func_body,
        workflow_context.get_workflow_step_context(),
        workflow.step_id,
        baked_inputs,
        workflow_data.step_options,
    )

    # Stage 4: post processing outputs
    if not isinstance(persisted_output, WorkflowOutputType):
        persisted_output = ray.put(persisted_output)
    if not isinstance(persisted_output, WorkflowOutputType):
        volatile_output = ray.put(volatile_output)

    if step_options.step_type != StepType.READONLY_ACTOR_METHOD:
        if not step_options.allow_inplace:
            # TODO: [Possible flaky bug] Here the RUNNING state may
            # be recorded earlier than SUCCESSFUL. This caused some
            # confusion during development.
            _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                                [volatile_output])

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
Пример #5
0
def _workflow_step_executor(
    func: Callable,
    context: "WorkflowStepContext",
    step_id: "StepID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
) -> Tuple[Any, Any]:
    """Executor function for workflow step.

    Args:
        step_id: ID of the step.
        func: The workflow step function.
        baked_inputs: The processed inputs for the step.
        context: Workflow step context. Used to access correct storage etc.
        runtime_options: Parameters for workflow step execution.

    Returns:
        Workflow step output.
    """
    # Part 1: update the context for the step
    workflow_context.update_workflow_step_context(context, step_id)
    context = workflow_context.get_workflow_step_context()
    step_type = runtime_options.step_type

    # Part 2: resolve inputs
    args, kwargs = baked_inputs.resolve()

    # Part 3: execute the step
    store = workflow_storage.get_workflow_storage()
    try:
        step_prerun_metadata = {"start_time": time.time()}
        store.save_step_prerun_metadata(step_id, step_prerun_metadata)
        persisted_output, volatile_output = _wrap_run(func, runtime_options,
                                                      *args, **kwargs)
        step_postrun_metadata = {"end_time": time.time()}
        store.save_step_postrun_metadata(step_id, step_postrun_metadata)
    except Exception as e:
        commit_step(store, step_id, None, exception=e)
        raise e

    # Part 4: save outputs
    if step_type == StepType.READONLY_ACTOR_METHOD:
        if isinstance(volatile_output, Workflow):
            raise TypeError(
                "Returning a Workflow from a readonly virtual actor "
                "is not allowed.")
        assert not isinstance(persisted_output, Workflow)
    else:
        store = workflow_storage.get_workflow_storage()
        commit_step(store, step_id, persisted_output, exception=None)
        if isinstance(persisted_output, Workflow):
            outer_most_step_id = context.outer_most_step_id
            if step_type == StepType.FUNCTION:
                # Passing down outer most step so inner nested steps would
                # access the same outer most step.
                if not context.outer_most_step_id:
                    # The current workflow step returns a nested workflow, and
                    # there is no outer step for the current step. So the
                    # current step is the outer most step for the inner nested
                    # workflow steps.
                    outer_most_step_id = workflow_context.get_current_step_id()
            assert volatile_output is None
            # Execute sub-workflow. Pass down "outer_most_step_id".
            with workflow_context.fork_workflow_step_context(
                    outer_most_step_id=outer_most_step_id):
                result = execute_workflow(persisted_output)
            # When virtual actor returns a workflow in the method,
            # the volatile_output and persisted_output will be put together
            persisted_output = result.persisted_output
            volatile_output = result.volatile_output
        elif context.last_step_of_workflow:
            # advance the progress of the workflow
            store.advance_progress(step_id)
        _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    if isinstance(volatile_output, Workflow):
        # This is the case where a step method is called in the virtual actor.
        # We need to run the method to get the final result.
        assert step_type == StepType.ACTOR_METHOD
        volatile_output = volatile_output.run_async(
            workflow_context.get_current_workflow_id())
    return persisted_output, volatile_output
Пример #6
0
def _workflow_step_executor(step_type: StepType, func: Callable,
                            context: workflow_context.WorkflowStepContext,
                            step_id: "StepID",
                            baked_inputs: "_BakedWorkflowInputs",
                            catch_exceptions: bool, max_retries: int) -> Any:
    """Executor function for workflow step.

    Args:
        step_type: The type of workflow step.
        func: The workflow step function.
        context: Workflow step context. Used to access correct storage etc.
        step_id: The ID of the step.
        baked_inputs: The processed inputs for the step.
        catch_exceptions: If set to be true, return
            (Optional[Result], Optional[Error]) instead of Result.
        max_retries: Max number of retries encounter of a failure.

    Returns:
        Workflow step output.
    """
    workflow_context.update_workflow_step_context(context, step_id)
    args, kwargs = _resolve_step_inputs(baked_inputs)
    store = workflow_storage.get_workflow_storage()
    try:
        persisted_output, volatile_output = _wrap_run(
            func, step_type, step_id, catch_exceptions, max_retries, *args,
            **kwargs)
    except Exception as e:
        commit_step(store, step_id, None, e)
        raise e
    if step_type == StepType.READONLY_ACTOR_METHOD:
        if isinstance(volatile_output, Workflow):
            raise TypeError(
                "Returning a Workflow from a readonly virtual actor "
                "is not allowed.")
        assert not isinstance(persisted_output, Workflow)
    else:
        store = workflow_storage.get_workflow_storage()
        commit_step(store, step_id, persisted_output, None)
        outer_most_step_id = context.outer_most_step_id
        if isinstance(persisted_output, Workflow):
            if step_type == StepType.FUNCTION:
                # Passing down outer most step so inner nested steps would
                # access the same outer most step.
                if not context.outer_most_step_id:
                    # The current workflow step returns a nested workflow, and
                    # there is no outer step for the current step. So the
                    # current step is the outer most step for the inner nested
                    # workflow steps.
                    outer_most_step_id = workflow_context.get_current_step_id()
            assert volatile_output is None
            # Execute sub-workflow. Pass down "outer_most_step_id".
            with workflow_context.fork_workflow_step_context(
                    outer_most_step_id=outer_most_step_id):
                result = execute_workflow(persisted_output)
            # When virtual actor returns a workflow in the method,
            # the volatile_output and persisted_output will be put together
            persisted_output = result.persisted_output
            volatile_output = result.volatile_output
        elif context.last_step_of_workflow:
            # advance the progress of the workflow
            store.advance_progress(step_id)
        _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    if isinstance(volatile_output, Workflow):
        # This is the case where a step method is called in the virtual actor.
        # We need to run the method to get the final result.
        assert step_type == StepType.ACTOR_METHOD
        volatile_output = volatile_output.run_async(
            workflow_context.get_current_workflow_id())
    return persisted_output, volatile_output
Пример #7
0
def execute_workflow(workflow: "Workflow") -> "WorkflowExecutionResult":
    """Execute workflow.

    Args:
        workflow: The workflow to be executed.

    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result

    # Stage 1: prepare inputs
    workflow_data = workflow.data
    inputs = workflow_data.inputs
    # Here A is the outer workflow step, B & C are the inner steps.
    # C is the output step for A, because C produces the output for A.
    #
    # @workflow.step
    # def A():
    #     b = B.step()
    #     return C.step(b)
    #
    # If the outer workflow step skips checkpointing, it would
    # update the checkpoint context of all inner steps except
    # the output step, marking them "detached" from the DAG.
    # Output step is not detached from the DAG because once
    # completed, it replaces the output of the outer step.
    step_context = workflow_context.get_workflow_step_context()
    checkpoint_context = step_context.checkpoint_context.copy()
    # "detached" could be defined recursively:
    # detached := already detached or the outer step skips checkpointing
    checkpoint_context.detached_from_dag = (
        checkpoint_context.detached_from_dag
        or not step_context.checkpoint_context.checkpoint)
    # Apply checkpoint context to input steps. Since input steps
    # further apply them to their inputs, this would eventually
    # apply to all steps except the output step. This avoids
    # detaching the output step.
    workflow_outputs = []
    with workflow_context.fork_workflow_step_context(
            outer_most_step_id=None,
            last_step_of_workflow=False,
            checkpoint_context=checkpoint_context,
    ):
        for w in inputs.workflows:
            static_ref = w.ref
            if static_ref is None:
                # The input workflow is not a reference to an executed
                # workflow .
                output = execute_workflow(w).persisted_output
                static_ref = WorkflowStaticRef(step_id=w.step_id, ref=output)
            workflow_outputs.append(static_ref)

    baked_inputs = _BakedWorkflowInputs(
        args=inputs.args,
        workflow_outputs=workflow_outputs,
        workflow_refs=inputs.workflow_refs,
    )

    # Stage 2: match executors
    step_options = workflow_data.step_options
    if step_options.allow_inplace:
        # TODO(suquark): For inplace execution, it is impossible
        # to get the ObjectRef of the output before execution.
        # Here we use a dummy ObjectRef, because _record_step_status does not
        # even use it (?!).
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [ray.put(None)])
        # Note: we need to be careful about workflow context when
        # calling the executor directly.
        # TODO(suquark): We still have recursive Python calls.
        # This would cause stack overflow if we have a really
        # deep recursive call. We should fix it later.
        if step_options.step_type == StepType.WAIT:
            executor = _workflow_wait_executor
        else:
            executor = _workflow_step_executor
    else:
        if step_options.step_type == StepType.WAIT:
            # This is very important to set "num_cpus=0" to
            # ensure "workflow.wait" is not blocked by other
            # tasks.
            executor = _workflow_wait_executor_remote.options(
                num_cpus=0).remote
        else:
            executor = _workflow_step_executor_remote.options(
                **step_options.ray_options).remote

    # Stage 3: execution
    persisted_output, volatile_output = executor(
        workflow_data.func_body,
        step_context,
        workflow.step_id,
        baked_inputs,
        workflow_data.step_options,
    )

    # Stage 4: post processing outputs
    if not isinstance(persisted_output, WorkflowOutputType):
        persisted_output = ray.put(persisted_output)
    if not isinstance(persisted_output, WorkflowOutputType):
        volatile_output = ray.put(volatile_output)

    if step_options.step_type != StepType.READONLY_ACTOR_METHOD:
        if not step_options.allow_inplace:
            # TODO: [Possible flaky bug] Here the RUNNING state may
            # be recorded earlier than SUCCESSFUL. This caused some
            # confusion during development.
            _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                                [volatile_output])

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
Пример #8
0
def _workflow_step_executor(
    func: Callable,
    context: "WorkflowStepContext",
    step_id: "StepID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
    inplace: bool = False,
) -> Tuple[Any, Any]:
    """Executor function for workflow step.

    Args:
        step_id: ID of the step.
        func: The workflow step function.
        baked_inputs: The processed inputs for the step.
        context: Workflow step context. Used to access correct storage etc.
        runtime_options: Parameters for workflow step execution.
        inplace: Execute the workflow inplace.

    Returns:
        Workflow step output.
    """
    # Part 1: update the context for the step
    workflow_context.update_workflow_step_context(context, step_id)
    context = workflow_context.get_workflow_step_context()
    step_type = runtime_options.step_type
    context.checkpoint_context.checkpoint = runtime_options.checkpoint

    # Part 2: resolve inputs
    args, kwargs = baked_inputs.resolve()

    # Part 3: execute the step
    store = workflow_storage.get_workflow_storage()
    try:
        step_prerun_metadata = {"start_time": time.time()}
        store.save_step_prerun_metadata(step_id, step_prerun_metadata)
        with workflow_context.workflow_execution():
            persisted_output, volatile_output = _wrap_run(
                func, runtime_options, *args, **kwargs)
        step_postrun_metadata = {"end_time": time.time()}
        store.save_step_postrun_metadata(step_id, step_postrun_metadata)
    except Exception as e:
        # Always checkpoint the exception.
        commit_step(store, step_id, None, exception=e)
        raise e

    # Part 4: save outputs
    if step_type == StepType.READONLY_ACTOR_METHOD:
        if isinstance(volatile_output, Workflow):
            raise TypeError(
                "Returning a Workflow from a readonly virtual actor is not allowed."
            )
        assert not isinstance(persisted_output, Workflow)
    else:
        # TODO(suquark): Validate checkpoint options before
        # commit the step.
        store = workflow_storage.get_workflow_storage()
        if CheckpointMode(runtime_options.checkpoint) == CheckpointMode.SYNC:
            commit_step(
                store,
                step_id,
                persisted_output,
                exception=None,
            )
        if isinstance(persisted_output, Workflow):
            sub_workflow = persisted_output
            outer_most_step_id = context.outer_most_step_id
            assert volatile_output is None
            if step_type == StepType.FUNCTION:
                # Passing down outer most step so inner nested steps would
                # access the same outer most step.
                if not context.outer_most_step_id:
                    # The current workflow step returns a nested workflow, and
                    # there is no outer step for the current step. So the
                    # current step is the outer most step for the inner nested
                    # workflow steps.
                    outer_most_step_id = workflow_context.get_current_step_id()
            if inplace:
                _step_options = sub_workflow.data.step_options
                if (_step_options.step_type != StepType.WAIT
                        and runtime_options.ray_options !=
                        _step_options.ray_options):
                    logger.warning(
                        f"Workflow step '{sub_workflow.step_id}' uses "
                        f"a Ray option different to its caller step '{step_id}' "
                        f"and will be executed inplace. Ray assumes it still "
                        f"consumes the same resource as the caller. This may result "
                        f"in oversubscribing resources.")
                return (
                    InplaceReturnedWorkflow(
                        sub_workflow,
                        {"outer_most_step_id": outer_most_step_id}),
                    None,
                )
            # Execute sub-workflow. Pass down "outer_most_step_id".
            with workflow_context.fork_workflow_step_context(
                    outer_most_step_id=outer_most_step_id):
                result = execute_workflow(sub_workflow)
            # When virtual actor returns a workflow in the method,
            # the volatile_output and persisted_output will be put together
            persisted_output = result.persisted_output
            volatile_output = result.volatile_output
        elif context.last_step_of_workflow:
            # advance the progress of the workflow
            store.advance_progress(step_id)
        _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    if isinstance(volatile_output, Workflow):
        # This is the case where a step method is called in the virtual actor.
        # We need to run the method to get the final result.
        assert step_type == StepType.ACTOR_METHOD
        volatile_output = volatile_output.run_async(
            workflow_context.get_current_workflow_id())
    return persisted_output, volatile_output
Пример #9
0
def _execute_workflow(job_id,
                      workflow: "Workflow") -> "WorkflowExecutionResult":
    """Internal function of workflow execution."""
    if workflow.executed:
        return workflow.result

    # Stage 1: prepare inputs
    workflow_data = workflow.data
    inputs = workflow_data.inputs
    # Here A is the outer workflow step, B & C are the inner steps.
    # C is the output step for A, because C produces the output for A.
    #
    # @workflow.step
    # def A():
    #     b = B.step()
    #     return C.step(b)
    #
    # If the outer workflow step skips checkpointing, it would
    # update the checkpoint context of all inner steps except
    # the output step, marking them "detached" from the DAG.
    # Output step is not detached from the DAG because once
    # completed, it replaces the output of the outer step.
    step_context = workflow_context.get_workflow_step_context()
    checkpoint_context = step_context.checkpoint_context.copy()
    # "detached" could be defined recursively:
    # detached := already detached or the outer step skips checkpointing
    checkpoint_context.detached_from_dag = (
        checkpoint_context.detached_from_dag
        or not step_context.checkpoint_context.checkpoint)
    # Apply checkpoint context to input steps. Since input steps
    # further apply them to their inputs, this would eventually
    # apply to all steps except the output step. This avoids
    # detaching the output step.
    workflow_outputs = []
    with workflow_context.fork_workflow_step_context(
            outer_most_step_id=None,
            last_step_of_workflow=False,
            checkpoint_context=checkpoint_context,
    ):
        for w in inputs.workflows:
            static_ref = w.ref
            if static_ref is None:
                extra_options = w.data.step_options.ray_options
                # The input workflow is not a reference to an executed
                # workflow.
                static_ref = execute_workflow(job_id, w).output
                static_ref._resolve_like_object_ref_in_args = extra_options.get(
                    "_resolve_like_object_ref_in_args", False)
            workflow_outputs.append(static_ref)

    baked_inputs = _BakedWorkflowInputs(
        args=inputs.args,
        workflow_outputs=workflow_outputs,
        workflow_refs=inputs.workflow_refs,
        job_id=job_id,
    )

    # Stage 2: match executors
    step_options = workflow_data.step_options
    if step_options.allow_inplace:
        # TODO(suquark): For inplace execution, it is impossible
        # to get the ObjectRef of the output before execution.
        # Here we use a dummy ObjectRef, because _record_step_status does not
        # even use it (?!).
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [ray.put(None)])
        # Note: we need to be careful about workflow context when
        # calling the executor directly.
        # TODO(suquark): We still have recursive Python calls.
        # This would cause stack overflow if we have a really
        # deep recursive call. We should fix it later.
        if step_options.step_type == StepType.WAIT:
            executor = _workflow_wait_executor
        else:
            # Tell the executor that we are running inplace. This enables
            # tail-recursion optimization.
            executor = functools.partial(_workflow_step_executor, inplace=True)
    else:
        if step_options.step_type == StepType.WAIT:
            # This is very important to set "num_cpus=0" to
            # ensure "workflow.wait" is not blocked by other
            # tasks.
            executor = _workflow_wait_executor_remote.options(
                num_cpus=0).remote
        else:
            ray_options = step_options.ray_options.copy()
            # cleanup the "_resolve_like_object_ref_in_args" option, it is not for Ray.
            ray_options.pop("_resolve_like_object_ref_in_args", None)
            executor = _workflow_step_executor_remote.options(
                **ray_options).remote

    # Stage 3: execution
    output = executor(
        workflow_data.func_body,
        step_context,
        job_id,
        workflow.step_id,
        baked_inputs,
        workflow_data.step_options,
    )

    # Stage 4: post processing outputs
    if not step_options.allow_inplace:
        # TODO: [Possible flaky bug] Here the RUNNING state may
        # be recorded earlier than SUCCESSFUL. This caused some
        # confusion during development.

        # convert into workflow static ref for step status record.
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING, [None])

    result = WorkflowExecutionResult(output)
    workflow._result = result
    workflow._executed = True
    return result