def _inspect_step(self, step_id: StepID) -> StepInspectResult: items = self._scan(self._key_step_prefix(step_id), ignore_errors=True) keys = set(items) # does this step contains output checkpoint file? if STEP_OUTPUT in keys: return StepInspectResult(output_object_valid=True) # do we know where the output comes from? if STEP_OUTPUTS_METADATA in keys: output_step_id = self._locate_output_step_id(step_id) return StepInspectResult(output_step_id=output_step_id) # read inputs metadata try: metadata = self._get(self._key_step_input_metadata(step_id), True) return StepInspectResult( args_valid=(STEP_ARGS in keys), func_body_valid=(STEP_FUNC_BODY in keys), workflows=metadata["workflows"], workflow_refs=metadata["workflow_refs"], step_options=WorkflowStepRuntimeOptions.from_dict( metadata["step_options"]), step_raised_exception=(STEP_EXCEPTION in keys), ) except Exception: return StepInspectResult( args_valid=(STEP_ARGS in keys), func_body_valid=(STEP_FUNC_BODY in keys), step_raised_exception=(STEP_EXCEPTION in keys), )
def options( self, *, max_retries=0, catch_exceptions=False, name=None, metadata=None, **ray_options, ) -> "_VirtualActorMethodHelper": validate_user_metadata(metadata) options = WorkflowStepRuntimeOptions.make( step_type=self._options.step_type, catch_exceptions=catch_exceptions if catch_exceptions is not None else self._options.catch_exceptions, max_retries=max_retries if max_retries is not None else self._options.max_retries, ray_options={ **self._options.ray_options, **(ray_options if ray_options is not None else {}), }, ) _self = _VirtualActorMethodHelper( self._original_class, self._original_method, self._method_name, runtime_options=options, ) _self._name = name if name is not None else self._name _self._user_metadata = { **self._user_metadata, **(metadata if metadata is not None else {}), } return _self
def _build_workflow(*args, **kwargs) -> Workflow: flattened_args = signature.flatten_args(self._func_signature, args, kwargs) def prepare_inputs(): ensure_ray_initialized() return serialization_context.make_workflow_inputs( flattened_args) nonlocal step_options if step_options is None: step_options = WorkflowStepRuntimeOptions.make( step_type=StepType.FUNCTION) # We could have "checkpoint=None" when we use @workflow.step # with arguments. Avoid this by updating it here. step_options.checkpoint = _inherit_checkpoint_option( step_options.checkpoint) workflow_data = WorkflowData( func_body=self._func, inputs=None, step_options=step_options, name=self._name, user_metadata=self._user_metadata, ) return Workflow(workflow_data, prepare_inputs)
def _make_workflow_step_function(node: FunctionNode): from ray.workflow.step_function import WorkflowStepFunction bound_options = node._bound_options.copy() workflow_options = bound_options.pop("_metadata", {}).get(WORKFLOW_OPTIONS, {}) # "_resolve_like_object_ref_in_args" indicates we should resolve the # workflow like an ObjectRef, when included in the arguments of # another workflow. bound_options["_resolve_like_object_ref_in_args"] = True step_options = WorkflowStepRuntimeOptions.make( step_type=StepType.FUNCTION, catch_exceptions=workflow_options.get("catch_exceptions", None), max_retries=workflow_options.get("max_retries", None), allow_inplace=workflow_options.get("allow_inplace", False), checkpoint=workflow_options.get("checkpoint", None), ray_options=bound_options, ) return WorkflowStepFunction( node._body, step_options=step_options, name=workflow_options.get("name", None), metadata=workflow_options.pop("metadata", None), )
def options( self, *, max_retries: int = None, catch_exceptions: bool = None, name: str = None, metadata: Dict[str, Any] = None, allow_inplace: bool = None, checkpoint: "Optional[CheckpointModeType]" = None, **ray_options, ) -> "WorkflowStepFunction": """This function set how the step function is going to be executed. Args: max_retries: num of retries the step for an application level error. catch_exceptions: Whether the user want to take care of the failure mannually. If it's set to be true, (Optional[R], Optional[E]) will be returned. If it's false, the normal result will be returned. name: The name of this step, which will be used to generate the step_id of the step. The name will be used directly as the step id if possible, otherwise deduplicated by appending .N suffixes. metadata: metadata to add to the step. allow_inplace: Execute the workflow step inplace. checkpoint: The option for checkpointing. **ray_options: All parameters in this fields will be passed to ray remote function options. Returns: The step function itself. """ validate_user_metadata(metadata) name = name if name is not None else self._name metadata = { **self._user_metadata, **(metadata if metadata is not None else {}) } step_options = WorkflowStepRuntimeOptions.make( step_type=StepType.FUNCTION, catch_exceptions=catch_exceptions if catch_exceptions is not None else self._step_options.catch_exceptions, max_retries=max_retries if max_retries is not None else self._step_options.max_retries, allow_inplace=allow_inplace if allow_inplace is not None else self._step_options.allow_inplace, checkpoint=_inherit_checkpoint_option(checkpoint), ray_options={ **self._step_options.ray_options, **(ray_options if ray_options is not None else {}), }, ) return WorkflowStepFunction(self._func, step_options=step_options, name=name, metadata=metadata)
def step(*args, **kwargs): """A decorator used for creating workflow steps. Examples: >>> from ray import workflow >>> Flight, Hotel = ... # doctest: +SKIP >>> @workflow.step # doctest: +SKIP ... def book_flight(origin: str, dest: str) -> Flight: # doctest: +SKIP ... return Flight(...) # doctest: +SKIP >>> @workflow.step(max_retries=3, catch_exceptions=True) # doctest: +SKIP ... def book_hotel(dest: str) -> Hotel: # doctest: +SKIP ... return Hotel(...) # doctest: +SKIP """ if len(args) == 1 and len(kwargs) == 0 and callable(args[0]): options = WorkflowStepRuntimeOptions.make(step_type=StepType.FUNCTION) return make_step_decorator(options)(args[0]) if len(args) != 0: raise ValueError(f"Invalid arguments for step decorator {args}") max_retries = kwargs.pop("max_retries", None) catch_exceptions = kwargs.pop("catch_exceptions", None) name = kwargs.pop("name", None) metadata = kwargs.pop("metadata", None) allow_inplace = kwargs.pop("allow_inplace", False) checkpoint = kwargs.pop("checkpoint", None) ray_options = kwargs options = WorkflowStepRuntimeOptions.make( step_type=StepType.FUNCTION, catch_exceptions=catch_exceptions, max_retries=max_retries, allow_inplace=allow_inplace, checkpoint=checkpoint, ray_options=ray_options, ) return make_step_decorator(options, name, metadata)
def options( self, *, max_retries: int = 3, catch_exceptions: bool = False, name: str = None, metadata: Dict[str, Any] = None, allow_inplace: bool = False, checkpoint: "Optional[CheckpointModeType]" = None, **ray_options, ) -> "WorkflowStepFunction": """This function set how the step function is going to be executed. Args: max_retries: num of retries the step for an application level error. catch_exceptions: Whether the user want to take care of the failure mannually. If it's set to be true, (Optional[R], Optional[E]) will be returned. If it's false, the normal result will be returned. name: The name of this step, which will be used to generate the step_id of the step. The name will be used directly as the step id if possible, otherwise deduplicated by appending .N suffixes. metadata: metadata to add to the step. allow_inplace: Execute the workflow step inplace. checkpoint: The option for checkpointing. **ray_options: All parameters in this fields will be passed to ray remote function options. Returns: The step function itself. """ # TODO(suquark): The options seems drops items that we did not # specify (e.g., the name become "None" if we did not pass # name to the options). This does not seem correct to me. step_options = WorkflowStepRuntimeOptions.make( step_type=StepType.FUNCTION, catch_exceptions=catch_exceptions, max_retries=max_retries, allow_inplace=allow_inplace, checkpoint=_inherit_checkpoint_option(checkpoint), ray_options=ray_options, ) return WorkflowStepFunction( self._func, step_options=step_options, name=name, metadata=metadata )
def __init__( self, func: Callable, *, step_options: "WorkflowStepRuntimeOptions" = None, name: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ): if metadata is not None: if not isinstance(metadata, dict): raise ValueError("metadata must be a dict.") for k, v in metadata.items(): try: json.dumps(v) except TypeError as e: raise ValueError( "metadata values must be JSON serializable, " "however '{}' has a value whose {}.".format(k, e) ) if step_options is None: step_options = WorkflowStepRuntimeOptions.make(step_type=StepType.FUNCTION) self._func = func self._step_options = step_options self._func_signature = signature.extract_signature(func) self._name = name or "" self._user_metadata = metadata or {} # Override signature and docstring @functools.wraps(func) def _build_workflow(*args, **kwargs) -> Workflow: flattened_args = signature.flatten_args(self._func_signature, args, kwargs) def prepare_inputs(): ensure_ray_initialized() return serialization_context.make_workflow_inputs(flattened_args) workflow_data = WorkflowData( func_body=self._func, inputs=None, step_options=step_options, name=self._name, user_metadata=self._user_metadata, ) return Workflow(workflow_data, prepare_inputs) self.step = _build_workflow
def __init__(self, original_class: type): actor_methods = inspect.getmembers(original_class, is_function_or_method) self.cls = original_class self.module = original_class.__module__ self.name = original_class.__name__ self.qualname = original_class.__qualname__ self.methods = {} for method_name, method in actor_methods: self._readonly = getattr(method, "__virtual_actor_readonly__", False) if self._readonly: step_type = StepType.READONLY_ACTOR_METHOD else: step_type = StepType.ACTOR_METHOD options = WorkflowStepRuntimeOptions.make(step_type=step_type) self.methods[method_name] = _VirtualActorMethodHelper( original_class, method, method_name, runtime_options=options )
def test_workflow_storage(workflow_start_regular): workflow_id = test_workflow_storage.__name__ wf_storage = workflow_storage.WorkflowStorage(workflow_id) step_id = "some_step" step_options = WorkflowStepRuntimeOptions.make(step_type=StepType.FUNCTION) input_metadata = { "name": "test_basic_workflows.append1", "workflows": ["def"], "workflow_refs": ["some_ref"], "step_options": step_options.to_dict(), } output_metadata = { "output_step_id": "a12423", "dynamic_output_step_id": "b1234" } root_output_metadata = {"output_step_id": "c123"} flattened_args = [ signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543" ] args = signature.recover_args(flattened_args) output = ["the_answer"] object_resolved = 42 obj_ref = ray.put(object_resolved) # test basics wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True) wf_storage._put(wf_storage._key_step_function_body(step_id), some_func) wf_storage._put(wf_storage._key_step_args(step_id), flattened_args) wf_storage._put(wf_storage._key_obj_id(obj_ref.hex()), ray.get(obj_ref)) wf_storage._put(wf_storage._key_step_output_metadata(step_id), output_metadata, True) wf_storage._put(wf_storage._key_step_output_metadata(""), root_output_metadata, True) wf_storage._put(wf_storage._key_step_output(step_id), output) assert wf_storage.load_step_output(step_id) == output assert wf_storage.load_step_args(step_id, [], []) == args assert wf_storage.load_step_func_body(step_id)(33) == 34 assert ray.get(wf_storage.load_object_ref( obj_ref.hex())) == object_resolved # test s3 path # here we hardcode the path to make sure s3 path is parsed correctly from ray._private.storage import _storage_uri if _storage_uri.startswith("s3://"): assert wf_storage._get("steps/outputs.json", True) == root_output_metadata # test "inspect_step" inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_object_valid=True) assert inspect_result.is_recoverable() step_id = "some_step2" wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True) wf_storage._put(wf_storage._key_step_function_body(step_id), some_func) wf_storage._put(wf_storage._key_step_args(step_id), args) wf_storage._put(wf_storage._key_step_output_metadata(step_id), output_metadata, True) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_step_id=output_metadata["dynamic_output_step_id"]) assert inspect_result.is_recoverable() step_id = "some_step3" wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True) wf_storage._put(wf_storage._key_step_function_body(step_id), some_func) wf_storage._put(wf_storage._key_step_args(step_id), args) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( args_valid=True, func_body_valid=True, workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], step_options=step_options, ) assert inspect_result.is_recoverable() step_id = "some_step4" wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True) wf_storage._put(wf_storage._key_step_function_body(step_id), some_func) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( func_body_valid=True, workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], step_options=step_options, ) assert not inspect_result.is_recoverable() step_id = "some_step5" wf_storage._put(wf_storage._key_step_input_metadata(step_id), input_metadata, True) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], step_options=step_options, ) assert not inspect_result.is_recoverable() step_id = "some_step6" inspect_result = wf_storage.inspect_step(step_id) print(inspect_result) assert inspect_result == workflow_storage.StepInspectResult() assert not inspect_result.is_recoverable()
def _node_visitor(node: Any) -> Any: if isinstance(node, FunctionNode): bound_options = node._bound_options.copy() num_returns = bound_options.get("num_returns", 1) if num_returns is None: # ray could use `None` as default value num_returns = 1 if num_returns > 1: raise ValueError("Workflow steps can only have one return.") workflow_options = bound_options.pop("_metadata", {}).get(WORKFLOW_OPTIONS, {}) # If checkpoint option is not specified, inherit checkpoint # options from context (i.e. checkpoint options of the outer # step). If it is still not specified, it's True by default. checkpoint = workflow_options.get("checkpoint", None) if checkpoint is None: checkpoint = context.checkpoint if context is not None else True # When it returns a nested workflow, catch_exception # should be passed recursively. catch_exceptions = workflow_options.get("catch_exceptions", None) if catch_exceptions is None: # TODO(suquark): should we also handle exceptions from a "leaf node" # in the continuation? For example, we have a workflow # > @ray.remote # > def A(): pass # > @ray.remote # > def B(x): return x # > @ray.remote # > def C(x): return workflow.continuation(B.bind(A.bind())) # > dag = C.options(**workflow.options(catch_exceptions=True)).bind() # Should C catches exceptions of A? if node.get_stable_uuid() == dag_node.get_stable_uuid(): # 'catch_exception' context should be passed down to # its direct continuation task. # In this case, the direct continuation is the output node. catch_exceptions = (context.catch_exceptions if context is not None else False) else: catch_exceptions = False max_retries = bound_options.get("max_retries", 3) if not isinstance(max_retries, int) or max_retries < -1: raise ValueError( "'max_retries' only accepts 0, -1 or a positive integer.") step_options = WorkflowStepRuntimeOptions( step_type=StepType.FUNCTION, catch_exceptions=catch_exceptions, max_retries=max_retries, allow_inplace=False, checkpoint=checkpoint, ray_options=bound_options, ) workflow_refs: List[WorkflowRef] = [] with serialization_context.workflow_args_serialization_context( workflow_refs): _func_signature = signature.extract_signature(node._body) flattened_args = signature.flatten_args( _func_signature, node._bound_args, node._bound_kwargs) # NOTE: When calling 'ray.put', we trigger python object # serialization. Under our serialization context, # Workflows are separated from the arguments, # leaving a placeholder object with all other python objects. # Then we put the placeholder object to object store, # so it won't be mutated later. This guarantees correct # semantics. See "tests/test_variable_mutable.py" as # an example. input_placeholder: ray.ObjectRef = ray.put(flattened_args) name = workflow_options.get("name") if name is None: name = f"{get_module(node._body)}.{slugify(get_qualname(node._body))}" task_id = ray.get(mgr.gen_step_id.remote(workflow_id, name)) state.add_dependencies(task_id, [s.task_id for s in workflow_refs]) state.task_input_args[task_id] = input_placeholder user_metadata = workflow_options.pop("metadata", {}) validate_user_metadata(user_metadata) state.tasks[task_id] = Task( name=name, options=step_options, user_metadata=user_metadata, func_body=node._body, ) return WorkflowRef(task_id) if isinstance(node, InputAttributeNode): return node._execute_impl() # get data from input node if isinstance(node, InputNode): return input_context # replace input node with input data if not isinstance(node, DAGNode): return node # return normal objects raise TypeError(f"Unsupported DAG node: {node}")
def wait(workflows: List[Workflow], num_returns: int = 1, timeout: Optional[float] = None) -> Workflow[WaitResult]: """Return a list of result of workflows that are ready and a list of workflows that are pending. Examples: >>> tasks = [task.step() for _ in range(3)] >>> wait_step = workflow.wait(tasks, num_returns=1) >>> print(wait_step.run()) ([result_1], [<Workflow object>, <Workflow object>]) >>> tasks = [task.step() for _ in range(2)] + [forever.step()] >>> wait_step = workflow.wait(tasks, num_returns=3, timeout=10) >>> print(wait_step.run()) ([result_1, result_2], [<Workflow object>]) If timeout is set, the function returns either when the requested number of workflows are ready or when the timeout is reached, whichever occurs first. If it is not set, the function simply waits until that number of workflows is ready and returns that exact number of workflows. This method returns two lists. The first list consists of workflows references that correspond to workflows that are ready. The second list corresponds to the rest of the workflows (which may or may not be ready). Ordering of the input list of workflows is preserved. That is, if A precedes B in the input list, and both are in the ready list, then A will precede B in the ready list. This also holds true if A and B are both in the remaining list. This method will issue a warning if it's running inside an async context. Args: workflows (List[Workflow]): List of workflows that may or may not be ready. Note that these workflows must be unique. num_returns (int): The number of workflows that should be returned. timeout (float): The maximum amount of time in seconds to wait before returning. Returns: A list of ready workflow results that are ready and a list of the remaining workflows. """ from ray.workflow import serialization_context from ray.workflow.common import WorkflowData for w in workflows: if not isinstance(w, Workflow): raise TypeError("The input of workflow.wait should be a list " "of workflows.") wait_inputs = serialization_context.make_workflow_inputs(workflows) step_options = WorkflowStepRuntimeOptions.make( step_type=StepType.WAIT, # Pass the options through Ray options. "num_returns" conflicts with # the "num_returns" for Ray remote functions, so we need to wrap it # under "wait_options". ray_options={ "wait_options": { "num_returns": num_returns, "timeout": timeout, } }, ) workflow_data = WorkflowData(func_body=None, inputs=wait_inputs, step_options=step_options, name="workflow.wait", user_metadata={}) return Workflow(workflow_data)
def test_workflow_storage(workflow_start_regular): workflow_id = test_workflow_storage.__name__ wf_storage = workflow_storage.WorkflowStorage(workflow_id, storage.get_global_storage()) step_id = "some_step" step_options = WorkflowStepRuntimeOptions( step_type=StepType.FUNCTION, catch_exceptions=False, max_retries=1, ray_options={}) input_metadata = { "name": "test_basic_workflows.append1", "workflows": ["def"], "workflow_refs": ["some_ref"], "step_options": step_options.to_dict(), } output_metadata = { "output_step_id": "a12423", "dynamic_output_step_id": "b1234" } flattened_args = [ signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543" ] args = signature.recover_args(flattened_args) output = ["the_answer"] object_resolved = 42 obj_ref = ray.put(object_resolved) # test basics asyncio_run( wf_storage._put( wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put( wf_storage._key_step_function_body(step_id), some_func)) asyncio_run( wf_storage._put(wf_storage._key_step_args(step_id), flattened_args)) asyncio_run( wf_storage._put( wf_storage._key_obj_id(obj_ref.hex()), ray.get(obj_ref))) asyncio_run( wf_storage._put( wf_storage._key_step_output_metadata(step_id), output_metadata, True)) asyncio_run(wf_storage._put(wf_storage._key_step_output(step_id), output)) assert wf_storage.load_step_output(step_id) == output assert wf_storage.load_step_args(step_id, [], []) == args assert wf_storage.load_step_func_body(step_id)(33) == 34 assert ray.get(wf_storage.load_object_ref( obj_ref.hex())) == object_resolved # test "inspect_step" inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_object_valid=True) assert inspect_result.is_recoverable() step_id = "some_step2" asyncio_run( wf_storage._put( wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put( wf_storage._key_step_function_body(step_id), some_func)) asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args)) asyncio_run( wf_storage._put( wf_storage._key_step_output_metadata(step_id), output_metadata, True)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_step_id=output_metadata["dynamic_output_step_id"]) assert inspect_result.is_recoverable() step_id = "some_step3" asyncio_run( wf_storage._put( wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put( wf_storage._key_step_function_body(step_id), some_func)) asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args)) inspect_result = wf_storage.inspect_step(step_id) step_options = WorkflowStepRuntimeOptions( step_type=StepType.FUNCTION, catch_exceptions=False, max_retries=1, ray_options={}) assert inspect_result == workflow_storage.StepInspectResult( args_valid=True, func_body_valid=True, workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], step_options=step_options) assert inspect_result.is_recoverable() step_id = "some_step4" asyncio_run( wf_storage._put( wf_storage._key_step_input_metadata(step_id), input_metadata, True)) asyncio_run( wf_storage._put( wf_storage._key_step_function_body(step_id), some_func)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( func_body_valid=True, workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], step_options=step_options) assert not inspect_result.is_recoverable() step_id = "some_step5" asyncio_run( wf_storage._put( wf_storage._key_step_input_metadata(step_id), input_metadata, True)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], step_options=step_options) assert not inspect_result.is_recoverable() step_id = "some_step6" inspect_result = wf_storage.inspect_step(step_id) print(inspect_result) assert inspect_result == workflow_storage.StepInspectResult() assert not inspect_result.is_recoverable()