def _run_step(self, args, kwargs): task_id = uuid.uuid4() refs = self._remote_function.remote( workflow_context.get_workflow_step_context(), task_id, args, kwargs) outputs, output_ids = wrap_step_outputs(refs) return outputs
def execute_virtual_actor_step(step_id: "StepID", workflow_data: "WorkflowData", readonly: bool) -> "WorkflowOutputType": from ray.experimental.workflow.common import WorkflowStatus if not readonly: _record_step_status(step_id, WorkflowStatus.RUNNING) workflow_outputs = [w.execute() for w in workflow_data.inputs.workflows] step_inputs = (workflow_data.inputs.args, workflow_outputs, workflow_data.inputs.object_refs) outer_most_step_id = "" if readonly: step_type = StepType.READONLY_ACTOR_METHOD else: step_type = StepType.ACTOR_METHOD ret = _workflow_step_executor.options(**workflow_data.ray_options).remote( step_type, workflow_data.func_body, workflow_context.get_workflow_step_context(), step_id, step_inputs, outer_most_step_id, workflow_data.catch_exceptions, workflow_data.max_retries, last_step_of_workflow=True) if readonly: return ret[1] # only return output. skip state return ret
def execute_workflow_step( step_func: Callable, step_id: "StepID", step_inputs: "WorkflowInputTuple", catch_exceptions: bool, step_max_retries: int, ray_options: Dict[str, Any], outer_most_step_id: "StepID") -> "WorkflowOutputType": return _workflow_step_executor.options(**ray_options).remote( step_func, workflow_context.get_workflow_step_context(), step_id, step_inputs, outer_most_step_id, catch_exceptions, step_max_retries)
def _run_step( self, step_id: StepID, step_inputs: WorkflowInputTuple, outer_most_step_id: Optional[StepID] = None) -> WorkflowOutputType: ref = workflow_step_executor.remote( self._func, workflow_context.get_workflow_step_context(), step_id, step_inputs, outer_most_step_id) return ref
def __init__(self, workflow_id: Optional[str] = None, store: Optional[storage.Storage] = None): if workflow_id is None: context = workflow_context.get_workflow_step_context() workflow_id = context.workflow_id if store is None: store = storage.get_global_storage() self._storage = store self._workflow_id = workflow_id
def execute_workflow_step( step_func: Callable, step_id: "StepID", step_inputs: "WorkflowInputTuple", catch_exceptions: bool, step_max_retries: int, ray_options: Dict[str, Any], outer_most_step_id: "StepID") -> "WorkflowOutputType": from ray.experimental.workflow.common import WorkflowStatus _record_step_status(step_id, WorkflowStatus.RUNNING) return _workflow_step_executor.options(**ray_options).remote( step_func, workflow_context.get_workflow_step_context(), step_id, step_inputs, outer_most_step_id, catch_exceptions, step_max_retries)
def execute_workflow( workflow: "Workflow", outer_most_step_id: Optional[str] = None, last_step_of_workflow: bool = False) -> "WorkflowExecutionResult": """Execute workflow. To fully explain what we are doing, we need to introduce some syntax first. The syntax for dependencies between workflow steps "A.step(B.step())" is "A - B"; the syntax for nested workflow steps "def A(): return B.step()" is "A / B". In a chain/DAG of step dependencies, the "output step" is the step of last (topological) order. For example, in "A - B - C", C is the output step. In a chain of nested workflow steps, the initial "output step" is called the "outer most step" for other "output steps". For example, in "A / B / C / D", "A" is the outer most step for "B", "C", "D"; in the hybrid workflow "((A - B) / C / D) - (E / (F - G) / H)", "B" is the outer most step for "C", "D"; "E" is the outer most step for "G", "H". Args: workflow: The workflow to be executed. outer_most_step_id: The ID of the outer most workflow. None if it does not exists. See "step_executor.execute_workflow" for detailed explanation. last_step_of_workflow: The step that generates the output of the workflow (including nested steps). Returns: An object ref that represent the result. """ if workflow.executed: return workflow.result workflow_data = workflow.data if workflow_data.step_type != StepType.READONLY_ACTOR_METHOD: _record_step_status(workflow.id, WorkflowStatus.RUNNING) baked_inputs = _BakedWorkflowInputs.from_workflow_inputs( workflow_data.inputs) persisted_output, volatile_output = _workflow_step_executor.options( **workflow_data.ray_options).remote( workflow_data.step_type, workflow_data.func_body, workflow_context.get_workflow_step_context(), workflow.id, baked_inputs, outer_most_step_id, workflow_data.catch_exceptions, workflow_data.max_retries, last_step_of_workflow) if not isinstance(persisted_output, WorkflowOutputType): raise TypeError("Unexpected return type of the workflow.") result = WorkflowExecutionResult(persisted_output, volatile_output) workflow._result = result workflow._executed = True return result
def get_workflow_storage(workflow_id: Optional[str] = None) -> WorkflowStorage: """Get the storage for the workflow. Args: workflow_id: The ID of the storage. Returns: A workflow storage. """ store = storage.get_global_storage() if workflow_id is None: workflow_id = workflow_context.get_workflow_step_context().workflow_id return WorkflowStorage(workflow_id, store)
def execute_workflow_step(step_id: "StepID", workflow_data: "WorkflowData", outer_most_step_id: "StepID", last_step_of_workflow: bool) -> "WorkflowOutputType": _record_step_status(step_id, WorkflowStatus.RUNNING) workflow_outputs = [w.execute() for w in workflow_data.inputs.workflows] # NOTE: Input placeholder is only a placeholder. It only can be # deserialized under a proper serialization context. Directly # deserialize the placeholder without a context would raise # an exception. If we pass the placeholder to _step_execution_function # as a direct argument, it would be deserialized by Ray without a # proper context. To prevent it, we put it inside a tuple. step_inputs = (workflow_data.inputs.args, workflow_outputs, workflow_data.inputs.object_refs) return _workflow_step_executor.options(**workflow_data.ray_options).remote( StepType.FUNCTION, workflow_data.func_body, workflow_context.get_workflow_step_context(), step_id, step_inputs, outer_most_step_id, workflow_data.catch_exceptions, workflow_data.max_retries, last_step_of_workflow)[0]
def _resolve_dynamic_workflow_refs(workflow_refs: "List[WorkflowRef]"): """Get the output of a workflow step with the step ID at runtime. We lookup the output by the following order: 1. Query cached step output in the workflow manager. Fetch the physical output object. 2. If failed to fetch the physical output object, look into the storage to see whether the output is checkpointed. Load the checkpoint. 3. If failed to load the checkpoint, resume the step and get the output. """ workflow_manager = get_or_create_management_actor() context = workflow_context.get_workflow_step_context() workflow_id = context.workflow_id storage_url = context.storage_url workflow_ref_mapping = [] for workflow_ref in workflow_refs: step_ref = ray.get( workflow_manager.get_cached_step_output.remote( workflow_id, workflow_ref.step_id)) get_cached_step = False if step_ref is not None: try: output, _ = _resolve_object_ref(step_ref) get_cached_step = True except Exception: get_cached_step = False if not get_cached_step: wf_store = workflow_storage.get_workflow_storage() try: output = wf_store.load_step_output(workflow_ref.step_id) except DataLoadError: current_step_id = workflow_context.get_current_step_id() logger.warning("Failed to get the output of step " f"{workflow_ref.step_id}. Trying to resume it. " f"Current step: '{current_step_id}'") step_ref = recovery.resume_workflow_step( workflow_id, workflow_ref.step_id, storage_url).persisted_output output, _ = _resolve_object_ref(step_ref) workflow_ref_mapping.append(output) return workflow_ref_mapping
def execute_workflow_step(step_func: Callable, step_id: StepID, step_inputs: WorkflowInputTuple, outer_most_step_id: StepID) -> WorkflowOutputType: return _workflow_step_executor.remote( step_func, workflow_context.get_workflow_step_context(), step_id, step_inputs, outer_most_step_id)
def _run_step(self, step_id: StepID, step_inputs: WorkflowInputTuple) -> WorkflowOutputType: ref = self._remote_function.remote( workflow_context.get_workflow_step_context(), step_id, step_inputs) return ref