Пример #1
0
def run(entry_workflow: Workflow,
        workflow_id: Optional[str] = None,
        metadata: Optional[Dict] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously.
    """
    if metadata is not None:
        if not isinstance(metadata, dict):
            raise ValueError("metadata must be a dict.")
        for k, v in metadata.items():
            try:
                json.dumps(v)
            except TypeError as e:
                raise ValueError("metadata values must be JSON serializable, "
                                 "however '{}' has a value whose {}.".format(
                                     k, e))
    metadata = metadata or {}

    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    logger.info(
        f"Workflow job created. [id=\"{workflow_id}\", storage_url="
        f"\"{store.storage_url}\"]. Type: {entry_workflow.data.step_type} ")

    with workflow_context.workflow_step_context(workflow_id,
                                                store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists:
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION)
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id,
                                                  ignore_existing))
        if entry_workflow.data.step_type == StepType.FUNCTION:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #2
0
def run(
    entry_workflow: Workflow,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict] = None,
) -> ray.ObjectRef:
    """Run a workflow asynchronously."""
    validate_user_metadata(metadata)
    metadata = metadata or {}

    from ray.workflow.api import _ensure_workflow_initialized

    _ensure_workflow_initialized()

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"
    step_type = entry_workflow.data.step_options.step_type

    logger.info(
        f'Workflow job created. [id="{workflow_id}"]. Type: {step_type}.')

    with workflow_context.workflow_step_context(workflow_id):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # "Is growing" means we could adding steps to the (top-level)
        # workflow to grow the workflow dynamically at runtime.
        is_growing = step_type not in (StepType.FUNCTION, StepType.WAIT)

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if is_growing or not wf_exists:
            # We must checkpoint entry workflow.
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = is_growing
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        job_id = ray.get_runtime_context().job_id.hex()
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(job_id, workflow_id,
                                                  ignore_existing))
        if not is_growing:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #3
0
 async def _resume_one(wid: str) -> Tuple[str, Optional[ray.ObjectRef]]:
     try:
         result: "WorkflowExecutionResult" = (
             await workflow_manager.run_or_resume.remote(wid))
         obj = flatten_workflow_output(wid, result.persisted_output)
         return wid, obj
     except Exception:
         logger.error(f"Failed to resume workflow {wid}")
         return (wid, None)
Пример #4
0
def run(entry_workflow: Workflow,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously.
    """
    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{store.storage_url}\"].")

    with workflow_context.workflow_step_context(workflow_id,
                                                store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists:
            commit_step(ws, "", entry_workflow, None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION)
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id,
                                                  ignore_existing))
        if entry_workflow.data.step_type == StepType.FUNCTION:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #5
0
 async def _resume_one(wid: str) -> Tuple[str, Optional[ray.ObjectRef]]:
     try:
         job_id = ray.get_runtime_context().job_id.hex()
         result: "WorkflowExecutionResult" = (
             await workflow_manager.run_or_resume.remote(job_id, wid)
         )
         obj = flatten_workflow_output(wid, result.output)
         return wid, obj
     except Exception:
         logger.error(f"Failed to resume workflow {wid}")
         return (wid, None)
Пример #6
0
def test_workflow_output_resolving(workflow_start_regular_shared):
    # deep nested workflow
    nested_ref = deep_nested.remote(30)
    original_func = workflow_access._resolve_workflow_output
    # replace the original function with a new function that does not
    # involving named actor
    workflow_access._resolve_workflow_output = _resolve_workflow_output
    try:
        ref = workflow_access.flatten_workflow_output("fake_workflow_id", nested_ref)
    finally:
        # restore the function
        workflow_access._resolve_workflow_output = original_func
    assert ray.get(ref) == 42
Пример #7
0
def get_output(workflow_id: str, name: Optional[str]) -> ray.ObjectRef:
    """Get the output of a running workflow.
    See "api.get_output()" for details.
    """
    assert ray.is_initialized()
    try:
        workflow_manager = get_management_actor()
    except ValueError as e:
        raise ValueError(
            "Failed to connect to the workflow management "
            "actor. The workflow could have already failed. You can use "
            "workflow.resume() to resume the workflow.") from e
    output = ray.get(workflow_manager.get_output.remote(workflow_id, name))
    return flatten_workflow_output(workflow_id, output)
Пример #8
0
def resume(workflow_id: str) -> ray.ObjectRef:
    """Resume a workflow asynchronously. See "api.resume()" for details."""
    logger.info(f'Resuming workflow [id="{workflow_id}"].')
    workflow_manager = get_or_create_management_actor()
    # NOTE: It is important to 'ray.get' the returned output. This
    # ensures caller of 'run()' holds the reference to the workflow
    # result. Otherwise if the actor removes the reference of the
    # workflow output, the caller may fail to resolve the result.
    job_id = ray.get_runtime_context().job_id.hex()
    result: "WorkflowExecutionResult" = ray.get(
        workflow_manager.run_or_resume.remote(job_id,
                                              workflow_id,
                                              ignore_existing=False))
    logger.info(f"Workflow job {workflow_id} resumed.")
    return flatten_workflow_output(workflow_id, result.persisted_output)