Пример #1
0
def _resume_workflow_step_executor(
    job_id: str,
    workflow_id: str,
    step_id: "StepID",
    current_output: [ray.ObjectRef],
) -> Tuple[ray.ObjectRef, ray.ObjectRef]:
    with workflow_context.workflow_logging_context(job_id):
        # TODO (yic): We need better dependency management for virtual actor
        # The current output will always be empty for normal workflow
        # For virtual actor, if it's not empty, it means the previous job is
        # running. This is a really bad one.
        for ref in current_output:
            try:
                while isinstance(ref, ray.ObjectRef):
                    ref = ray.get(ref)
            except Exception:
                pass
        try:
            r = _construct_resume_workflow_from_step(workflow_id, step_id)
        except Exception as e:
            raise WorkflowNotResumableError(workflow_id) from e

        if not isinstance(r, Workflow):
            return r, None
        with workflow_context.workflow_step_context(
                workflow_id, last_step_of_workflow=True):
            from ray.workflow.step_executor import execute_workflow

            result = execute_workflow(job_id, r)
            return result.persisted_output, result.volatile_output
Пример #2
0
def _resume_workflow_step_executor(workflow_id: str, step_id: "StepID",
                                   store_url: str, current_output: [
                                       ray.ObjectRef
                                   ]) -> Tuple[ray.ObjectRef, ray.ObjectRef]:
    # TODO (yic): We need better dependency management for virtual actor
    # The current output will always be empty for normal workflow
    # For virtual actor, if it's not empty, it means the previous job is
    # running. This is a really bad one.
    for ref in current_output:
        try:
            while isinstance(ref, ray.ObjectRef):
                ref = ray.get(ref)
        except Exception:
            pass
    try:
        store = storage.create_storage(store_url)
        wf_store = workflow_storage.WorkflowStorage(workflow_id, store)
        r = _construct_resume_workflow_from_step(wf_store, step_id)
    except Exception as e:
        raise WorkflowNotResumableError(workflow_id) from e

    if isinstance(r, Workflow):
        with workflow_context.workflow_step_context(workflow_id,
                                                    store.storage_url):
            from ray.workflow.step_executor import (execute_workflow)
            result = execute_workflow(r, last_step_of_workflow=True)
            return result.persisted_output, result.volatile_output
    assert isinstance(r, StepID)
    return wf_store.load_step_output(r), None
Пример #3
0
def run(entry_workflow: Workflow,
        workflow_id: Optional[str] = None,
        metadata: Optional[Dict] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously.
    """
    if metadata is not None:
        if not isinstance(metadata, dict):
            raise ValueError("metadata must be a dict.")
        for k, v in metadata.items():
            try:
                json.dumps(v)
            except TypeError as e:
                raise ValueError("metadata values must be JSON serializable, "
                                 "however '{}' has a value whose {}.".format(
                                     k, e))
    metadata = metadata or {}

    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    logger.info(
        f"Workflow job created. [id=\"{workflow_id}\", storage_url="
        f"\"{store.storage_url}\"]. Type: {entry_workflow.data.step_type} ")

    with workflow_context.workflow_step_context(workflow_id,
                                                store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists:
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION)
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id,
                                                  ignore_existing))
        if entry_workflow.data.step_type == StepType.FUNCTION:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #4
0
def run(
    entry_workflow: Workflow,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict] = None,
) -> ray.ObjectRef:
    """Run a workflow asynchronously."""
    validate_user_metadata(metadata)
    metadata = metadata or {}

    from ray.workflow.api import _ensure_workflow_initialized

    _ensure_workflow_initialized()

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"
    step_type = entry_workflow.data.step_options.step_type

    logger.info(
        f'Workflow job created. [id="{workflow_id}"]. Type: {step_type}.')

    with workflow_context.workflow_step_context(workflow_id):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # "Is growing" means we could adding steps to the (top-level)
        # workflow to grow the workflow dynamically at runtime.
        is_growing = step_type not in (StepType.FUNCTION, StepType.WAIT)

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if is_growing or not wf_exists:
            # We must checkpoint entry workflow.
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = is_growing
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        job_id = ray.get_runtime_context().job_id.hex()
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(job_id, workflow_id,
                                                  ignore_existing))
        if not is_growing:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #5
0
 def _actor_method_call(self, method_helper: _VirtualActorMethodHelper,
                        args, kwargs) -> "ObjectRef":
     with workflow_context.workflow_step_context(self._actor_id,
                                                 self._storage.storage_url):
         wf = method_helper.step(*args, **kwargs)
         if method_helper.readonly:
             return execute_workflow(wf).volatile_output
         else:
             return wf.run_async(self._actor_id)
Пример #6
0
def _workflow_step_executor(
    func: Callable,
    context: "WorkflowStepContext",
    task_id: "TaskID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
) -> Tuple[Any, Any]:
    """Executor function for workflow step.

    Args:
        task_id: ID of the step.
        func: The workflow step function.
        baked_inputs: The processed inputs for the step.
        context: Workflow step context. Used to access correct storage etc.
        runtime_options: Parameters for workflow step execution.

    Returns:
        Workflow step output.
    """
    with workflow_context.workflow_step_context(context):
        store = workflow_storage.get_workflow_storage()
        # Part 1: resolve inputs
        args, kwargs = baked_inputs.resolve(store)

        # Part 2: execute the step
        try:
            store.save_step_prerun_metadata(task_id,
                                            {"start_time": time.time()})
            with workflow_context.workflow_execution():
                output = _wrap_run(func, runtime_options, *args, **kwargs)
            store.save_step_postrun_metadata(task_id,
                                             {"end_time": time.time()})
        except Exception as e:
            # Always checkpoint the exception.
            store.save_step_output(task_id, None, exception=e)
            raise e

        if isinstance(output, DAGNode):
            output = workflow_state_from_dag(output, None, context.workflow_id)
            execution_metadata = WorkflowExecutionMetadata(
                is_output_workflow=True)
        else:
            execution_metadata = WorkflowExecutionMetadata()

        # Part 3: save outputs
        # TODO(suquark): Validate checkpoint options before commit the task.
        if CheckpointMode(runtime_options.checkpoint) == CheckpointMode.SYNC:
            if isinstance(output, WorkflowExecutionState):
                store.save_workflow_execution_state(task_id, output)
            else:
                store.save_step_output(task_id, output, exception=None)
        return execution_metadata, output
Пример #7
0
 def _actor_method_call(self, method_name: str, args,
                        kwargs) -> "ObjectRef":
     cls = self._metadata.cls
     method = getattr(cls, method_name, None)
     if method is None:
         raise AttributeError(f"Method '{method_name}' does not exist.")
     with workflow_context.workflow_step_context(self._actor_id,
                                                 self._storage.storage_url):
         wf = method.step(*args, **kwargs)
         readonly = getattr(method, "__virtual_actor_readonly__", False)
         if readonly:
             return execute_workflow(wf).volatile_output
         else:
             return wf.run_async(self._actor_id)
Пример #8
0
def run(
    dag: DAGNode,
    dag_inputs: DAGInputData,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict] = None,
) -> ray.ObjectRef:
    """Run a workflow asynchronously."""
    validate_user_metadata(metadata)
    metadata = metadata or {}

    from ray.workflow.api import _ensure_workflow_initialized

    _ensure_workflow_initialized()

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    state = workflow_state_from_dag(dag, dag_inputs, workflow_id)
    logger.info(f'Workflow job created. [id="{workflow_id}"].')

    context = workflow_context.WorkflowStepContext(workflow_id=workflow_id)
    with workflow_context.workflow_step_context(context):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        job_id = ray.get_runtime_context().job_id.hex()

        try:
            ws.get_entrypoint_step_id()
            wf_exists = True
        except Exception:
            # The workflow does not exist. We must checkpoint entry workflow.
            ws.save_workflow_execution_state("", state)
            wf_exists = False
        workflow_manager = get_or_create_management_actor()
        if ray.get(workflow_manager.is_workflow_running.remote(workflow_id)):
            raise RuntimeError(f"Workflow '{workflow_id}' is already running.")
        if wf_exists:
            return resume(workflow_id)
        ignore_existing = ws.load_workflow_status() == WorkflowStatus.NONE
        ray.get(
            workflow_manager.submit_workflow.remote(
                workflow_id, state, ignore_existing=ignore_existing))
        return workflow_manager.execute_workflow.remote(job_id, context)
Пример #9
0
def run(entry_workflow: Workflow,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously.
    """
    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{store.storage_url}\"].")

    with workflow_context.workflow_step_context(workflow_id,
                                                store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists:
            commit_step(ws, "", entry_workflow, None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION)
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id,
                                                  ignore_existing))
        if entry_workflow.data.step_type == StepType.FUNCTION:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #10
0
def run_async(
    dag: DAGNode,
    *args,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    **kwargs,
) -> ray.ObjectRef:
    """Run a workflow asynchronously.

    If the workflow with the given id already exists, it will be resumed.

    Args:
        workflow_id: A unique identifier that can be used to resume the
            workflow. If not specified, a random id will be generated.
        metadata: The metadata to add to the workflow. It has to be able
            to serialize to json.

    Returns:
       The running result as ray.ObjectRef.

    """
    _ensure_workflow_initialized()
    if not isinstance(dag, DAGNode):
        raise TypeError("Input should be a DAG.")
    input_data = DAGInputData(*args, **kwargs)
    validate_user_metadata(metadata)
    metadata = metadata or {}

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    state = workflow_state_from_dag(dag, input_data, workflow_id)
    logger.info(f'Workflow job created. [id="{workflow_id}"].')

    context = workflow_context.WorkflowStepContext(workflow_id=workflow_id)
    with workflow_context.workflow_step_context(context):
        # checkpoint the workflow
        ws = WorkflowStorage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        job_id = ray.get_runtime_context().job_id.hex()

        try:
            ws.get_entrypoint_step_id()
            wf_exists = True
        except Exception:
            # The workflow does not exist. We must checkpoint entry workflow.
            ws.save_workflow_execution_state("", state)
            wf_exists = False
        workflow_manager = workflow_access.get_management_actor()
        if ray.get(
                workflow_manager.is_workflow_non_terminating.remote(
                    workflow_id)):
            raise RuntimeError(
                f"Workflow '{workflow_id}' is already running or pending.")
        if wf_exists:
            return resume_async(workflow_id)
        ignore_existing = ws.load_workflow_status() == WorkflowStatus.NONE
        ray.get(
            workflow_manager.submit_workflow.remote(
                workflow_id, state, ignore_existing=ignore_existing))
        return workflow_manager.execute_workflow.remote(job_id, context)