def wait( self, num_returns: int = 1, timeout: Optional[float] = None ) -> Tuple[List[Workflow], List[Workflow]]: """Return a list of workflows that are ready and a list of workflows that are not. See `api.wait()` for details. Args: num_returns (int): The number of workflows that should be returned. timeout (float): The maximum amount of time in seconds to wait before returning. Returns: A list of workflows that are ready and a list of the remaining workflows. """ if self.workflow_refs: raise ValueError("Currently, we do not support wait operations " "on dynamic workflow refs. They are typically " "generated by virtual actors.") refs_map = {w.ref: w for w in self.workflow_outputs} ready_ids, remaining_ids = ray.wait(list(refs_map.keys()), num_returns=num_returns, timeout=timeout) ready_workflows = [Workflow.from_ref(refs_map[i]) for i in ready_ids] remaining_workflows = [ Workflow.from_ref(refs_map[i]) for i in remaining_ids ] return ready_workflows, remaining_workflows
def _reconstruct_wait_step( reader: workflow_storage.WorkflowStorage, result: workflow_storage.StepInspectResult, input_map: Dict[StepID, Any], ): input_workflows = [] step_options = result.step_options wait_options = step_options.ray_options.get("wait_options", {}) for i, _step_id in enumerate(result.workflows): # Check whether the step has been loaded or not to avoid # duplication if _step_id in input_map: r = input_map[_step_id] else: r = _construct_resume_workflow_from_step(reader, _step_id, input_map) input_map[_step_id] = r if isinstance(r, Workflow): input_workflows.append(r) else: assert isinstance(r, StepID) # TODO (Alex): We should consider caching these outputs too. output = reader.load_step_output(r) # Simulate a workflow with a workflow reference so it could be # used directly by 'workflow.wait'. static_ref = WorkflowStaticRef(step_id=r, ref=ray.put(output)) wf = Workflow.from_ref(static_ref) input_workflows.append(wf) from ray import workflow return workflow.wait(input_workflows, **wait_options)
def _construct_resume_workflow_from_step( workflow_id: str, step_id: StepID) -> Union[Workflow, Any]: """Try to construct a workflow (step) that recovers the workflow step. If the workflow step already has an output checkpointing file, we return the workflow step id instead. Args: workflow_id: The ID of the workflow. step_id: The ID of the step we want to recover. Returns: A workflow that recovers the step, or the output of the step if it has been checkpointed. """ reader = workflow_storage.WorkflowStorage(workflow_id) # Step 1: construct dependency of the DAG (BFS) inpsect_results = {} dependency_map = defaultdict(list) num_in_edges = {} dag_visit_queue = deque([step_id]) while dag_visit_queue: s: StepID = dag_visit_queue.popleft() if s in inpsect_results: continue r = reader.inspect_step(s) inpsect_results[s] = r if not r.is_recoverable(): raise WorkflowStepNotRecoverableError(s) if r.output_object_valid: deps = [] elif isinstance(r.output_step_id, str): deps = [r.output_step_id] else: deps = r.workflows for w in deps: dependency_map[w].append(s) num_in_edges[s] = len(deps) dag_visit_queue.extend(deps) # Step 2: topological sort to determine the execution order (Kahn's algorithm) execution_queue: List[StepID] = [] start_nodes = deque(k for k, v in num_in_edges.items() if v == 0) while start_nodes: n = start_nodes.popleft() execution_queue.append(n) for m in dependency_map[n]: num_in_edges[m] -= 1 assert num_in_edges[m] >= 0, (m, n) if num_in_edges[m] == 0: start_nodes.append(m) # Step 3: recover the workflow by the order of the execution queue with serialization.objectref_cache(): # "input_map" is a context storing the input which has been loaded. # This context is important for deduplicate step inputs. input_map: Dict[StepID, Any] = {} for _step_id in execution_queue: result = inpsect_results[_step_id] if result.output_object_valid: input_map[_step_id] = reader.load_step_output(_step_id) continue if isinstance(result.output_step_id, str): input_map[_step_id] = input_map[result.output_step_id] continue # Process the wait step as a special case. if result.step_options.step_type == StepType.WAIT: wait_input_workflows = [] for w in result.workflows: output = input_map[w] if isinstance(output, Workflow): wait_input_workflows.append(output) else: # Simulate a workflow with a workflow reference so it could be # used directly by 'workflow.wait'. static_ref = WorkflowStaticRef(step_id=w, ref=ray.put(output)) wait_input_workflows.append( Workflow.from_ref(static_ref)) recovery_workflow = ray.workflow.wait( wait_input_workflows, **result.step_options.ray_options.get("wait_options", {}), ) else: args, kwargs = reader.load_step_args( _step_id, workflows=[input_map[w] for w in result.workflows], workflow_refs=list(map(WorkflowRef, result.workflow_refs)), ) func: Callable = reader.load_step_func_body(_step_id) # TODO(suquark): Use an alternative function when "workflow.step" # is fully deprecated. recovery_workflow = ray.workflow.step(func).step( *args, **kwargs) # override step_options recovery_workflow._step_id = _step_id recovery_workflow.data.step_options = result.step_options input_map[_step_id] = recovery_workflow # Step 4: return the output of the requested step return input_map[step_id]