def update_step_status(self, workflow_id: str, step_id: str, status: common.WorkflowStatus): # Note: For virtual actor, we could add more steps even if # the workflow finishes. self._step_status.setdefault(workflow_id, {}) if status == common.WorkflowStatus.SUCCESSFUL: self._step_status[workflow_id].pop(step_id, None) else: self._step_status.setdefault(workflow_id, {})[step_id] = status remaining = len(self._step_status[workflow_id]) if status != common.WorkflowStatus.RUNNING: self._step_output_cache.pop((workflow_id, step_id), None) if status != common.WorkflowStatus.FAILED and remaining != 0: return wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) if status == common.WorkflowStatus.FAILED: if workflow_id in self._workflow_outputs: cancel_job(self._workflow_outputs.pop(workflow_id).output) wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.FAILED)) self._step_status.pop(workflow_id) else: # remaining = 0 wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.SUCCESSFUL)) self._step_status.pop(workflow_id)
def get_output(self, workflow_id: str) -> "ray.ObjectRef": """Get the output of a running workflow. Args: workflow_id: The ID of a workflow job. Returns: An object reference that can be used to retrieve the workflow result. """ if workflow_id in self._workflow_outputs: return self._workflow_outputs[workflow_id].output wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) meta = wf_store.load_workflow_meta() if meta is None: raise ValueError(f"No such workflow {workflow_id}") if meta == common.WorkflowStatus.FAILED: raise ValueError( f"Workflow {workflow_id} failed, please resume it") step_id = wf_store.get_entrypoint_step_id() result = recovery.resume_workflow_step(workflow_id, step_id, self._store.storage_url) latest_output = LatestWorkflowOutput(result.persisted_output, workflow_id, step_id) self._workflow_outputs[workflow_id] = latest_output wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.RUNNING)) self._step_status.setdefault(workflow_id, {}) # "persisted_output" is the return value of a step or the state of # a virtual actor. return result.persisted_output
def run_or_resume(self, workflow_id: str, ignore_existing: bool = False ) -> "WorkflowExecutionResult": """Run or resume a workflow. Args: workflow_id: The ID of the workflow. ignore_existing: Ignore we already have an existing output. When set false, raise an exception if there has already been a workflow running with this id Returns: Workflow execution result that contains the state and output. """ if workflow_id in self._workflow_outputs and not ignore_existing: raise RuntimeError(f"The output of workflow[id={workflow_id}] " "already exists.") wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) step_id = wf_store.get_entrypoint_step_id() result = recovery.resume_workflow_step(workflow_id, step_id, self._store.storage_url) latest_output = LatestWorkflowOutput(result.persisted_output, workflow_id, step_id) self._workflow_outputs[workflow_id] = latest_output self._step_output_cache[workflow_id, step_id] = latest_output wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.RUNNING)) if workflow_id not in self._step_status: self._step_status[workflow_id] = {} logger.info(f"Workflow job [id={workflow_id}] started.") return result
def update_step_status(self, workflow_id: str, step_id: str, status: common.WorkflowStatus): if status == common.WorkflowStatus.FINISHED: self._step_status[workflow_id].pop(step_id, None) else: self._step_status.setdefault(workflow_id, {})[step_id] = status remaining = len(self._step_status[workflow_id]) if status != common.WorkflowStatus.RESUMABLE and remaining != 0: return wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) if status == common.WorkflowStatus.RESUMABLE: if workflow_id in self._workflow_outputs: cancel_job(self._workflow_outputs.pop(workflow_id)) wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.RESUMABLE)) self._step_status.pop(workflow_id) else: # remaining = 0 wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.FINISHED)) self._step_status.pop(workflow_id)
def run_or_resume(self, workflow_id: str) -> ray.ObjectRef: """Run or resume a workflow. Args: workflow_id: The ID of the workflow. Returns: An object reference that can be used to retrieve the workflow result. """ if workflow_id in self._workflow_outputs: raise ValueError(f"The output of workflow[id={workflow_id}] " "already exists.") output = recovery.resume_workflow_job.remote(workflow_id, self._store.storage_url) self._workflow_outputs[workflow_id] = output wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.RUNNING)) self._step_status[workflow_id] = {} logger.info(f"Workflow job [id={workflow_id}] started.") return output
def cancel_workflow(self, workflow_id: str) -> None: self._step_status.pop(workflow_id) cancel_job(self._workflow_outputs.pop(workflow_id).output) wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store) wf_store.save_workflow_meta( common.WorkflowMetaData(common.WorkflowStatus.CANCELED))