Пример #1
0
def test_nested_workflow_no_download(workflow_start_regular):
    """Test that we _only_ load from storage on recovery. For a nested workflow
    step, we should checkpoint the input/output, but continue to reuse the
    in-memory value.
    """
    @ray.remote
    def recursive(ref, count):
        if count == 0:
            return ref
        return workflow.continuation(recursive.bind(ref, count - 1))

    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(get_global_storage(), temp_dir)
        utils._alter_storage(debug_store)

        ref = ray.put("hello")
        result = workflow.create(recursive.bind([ref], 10)).run()

        ops = debug_store._logged_storage.get_op_counter()
        get_objects_count = 0
        for key in ops["get"]:
            if "objects" in key:
                get_objects_count += 1
        assert get_objects_count == 1, "We should only get once when resuming."
        put_objects_count = 0
        for key in ops["put"]:
            if "objects" in key:
                print(key)
                put_objects_count += 1
        assert (put_objects_count == 1
                ), "We should detect the object exists before uploading"
        assert ray.get(result) == ["hello"]
Пример #2
0
            def step(method_name, method, *args, **kwargs):
                readonly = getattr(method, "__virtual_actor_readonly__", False)
                flattened_args = self.flatten_args(method_name, args, kwargs)
                actor_id = workflow_context.get_current_workflow_id()
                if not readonly:
                    if method_name == "__init__":
                        state_ref = None
                    else:
                        ws = WorkflowStorage(actor_id, get_global_storage())
                        state_ref = WorkflowRef(ws.get_entrypoint_step_id())
                    # This is a hack to insert a positional argument.
                    flattened_args = [signature.DUMMY_TYPE, state_ref
                                      ] + flattened_args
                workflow_inputs = serialization_context.make_workflow_inputs(
                    flattened_args)

                if readonly:
                    _actor_method = _wrap_readonly_actor_method(
                        actor_id, self.cls, method_name)
                    step_type = StepType.READONLY_ACTOR_METHOD
                else:
                    _actor_method = _wrap_actor_method(self.cls, method_name)
                    step_type = StepType.ACTOR_METHOD
                # TODO(suquark): Support actor options.
                workflow_data = WorkflowData(
                    func_body=_actor_method,
                    step_type=step_type,
                    inputs=workflow_inputs,
                    max_retries=1,
                    catch_exceptions=False,
                    ray_options={},
                    name=None,
                )
                wf = Workflow(workflow_data)
                return wf
Пример #3
0
def test_wait_recovery_step_id(workflow_start_regular_shared):
    # This test ensures workflow reuse the original directory and
    # step id for "workflow.wait" during recovery.

    @workflow.step
    def identity(x: int):
        # block the step by a global mark
        assert utils.check_global_mark()
        return x

    w = workflow.wait([identity.step(42)], num_returns=1, timeout=None)
    utils.unset_global_mark()
    with pytest.raises(RaySystemError):
        _ = w.run(workflow_id="test_wait_recovery_step_id")
    utils.set_global_mark()
    ready, unready = ray.get(workflow.resume("test_wait_recovery_step_id"))
    assert ready == [42]

    from ray.workflow import storage, workflow_storage

    global_storage = storage.get_global_storage()
    wf_storage = workflow_storage.WorkflowStorage("test_wait_recovery_step_id",
                                                  global_storage)
    index = wf_storage.gen_step_id("workflow.wait")
    # no new step id
    assert index <= 1
Пример #4
0
def test_crash_after_commit(workflow_start_regular_shared):
    _storage = storage.get_global_storage()
    """Ensure that we don't re-call poll_for_event after `event_checkpointed`
       returns, even after a crash. Here we must call `event_checkpointed`
       twice, because there's no way to know if we called it after
       checkpointing.

    """
    class MyEventListener(workflow.EventListener):
        async def poll_for_event(self):
            assert not utils.check_global_mark("committed")

        async def event_checkpointed(self, event):
            utils.set_global_mark("committed")
            if utils.check_global_mark("first"):
                utils.set_global_mark("second")
            else:
                utils.set_global_mark("first")
                await asyncio.sleep(1000000)

    event_promise = workflow.wait_for_event(MyEventListener)
    event_promise.run_async("workflow")

    while not utils.check_global_mark("first"):
        time.sleep(0.1)

    ray.shutdown()
    subprocess.check_output(["ray", "stop", "--force"])

    ray.init(num_cpus=4)
    workflow.init(storage=_storage)
    workflow.resume("workflow")

    ray.get(workflow.get_output("workflow"))
    assert utils.check_global_mark("second")
Пример #5
0
def test_failure_with_storage(workflow_start_regular):
    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(get_global_storage(), temp_dir)
        _alter_storage(debug_store)

        wf = construct_workflow(length=3)
        result = wf.run(workflow_id="complex_workflow")
        index = _locate_initial_commit(debug_store) + 1
        debug_store.log_off()

        def resume(num_records_replayed):
            key = debug_store.wrapped_storage.make_key("complex_workflow")
            asyncio_run(debug_store.wrapped_storage.delete_prefix(key))

            async def replay():
                # We need to replay one by one to avoid conflict
                for i in range(num_records_replayed):
                    await debug_store.replay(i)

            asyncio_run(replay())
            return ray.get(workflow.resume(workflow_id="complex_workflow"))

        with pytest.raises(ValueError):
            # in cases, the replayed records are too few to resume the
            # workflow.
            resume(index - 1)

        if isinstance(debug_store.wrapped_storage, FilesystemStorageImpl):
            # filesystem is faster, so we can cover all cases
            step_len = 1
        else:
            step_len = max((len(debug_store) - index) // 5, 1)

        for j in range(index, len(debug_store), step_len):
            assert resume(j) == result
Пример #6
0
def test_embedded_objectrefs(workflow_start_regular):
    workflow_id = test_embedded_objectrefs.__name__
    base_storage = storage.get_global_storage()

    class ObjectRefsWrapper:
        def __init__(self, refs):
            self.refs = refs

    url = base_storage.storage_url

    wrapped = ObjectRefsWrapper([ray.put(1), ray.put(2)])

    promise = serialization.dump_to_storage(["key"], wrapped, workflow_id,
                                            base_storage)
    workflow_storage.asyncio_run(promise)

    # Be extremely explicit about shutting down. We want to make sure the
    # `_get` call deserializes the full object and puts it in the object store.
    # Shutting down the cluster should guarantee we don't accidently get the
    # old object and pass the test.
    ray.shutdown()
    subprocess.check_output("ray stop --force", shell=True)

    workflow.init(url)
    storage2 = workflow_storage.get_workflow_storage(workflow_id)

    result = workflow_storage.asyncio_run(storage2._get(["key"]))
    assert ray.get(result.refs) == [1, 2]
Пример #7
0
    def step(self, *args, **kwargs):
        flattened_args = signature.flatten_args(self._signature, args, kwargs)
        actor_id = workflow_context.get_current_workflow_id()
        if not self.readonly:
            if self._method_name == "__init__":
                state_ref = None
            else:
                ws = WorkflowStorage(actor_id, get_global_storage())
                state_ref = WorkflowRef(ws.get_entrypoint_step_id())
            # This is a hack to insert a positional argument.
            flattened_args = [signature.DUMMY_TYPE, state_ref] + flattened_args
        workflow_inputs = serialization_context.make_workflow_inputs(
            flattened_args)

        if self.readonly:
            _actor_method = _wrap_readonly_actor_method(
                actor_id, self._original_class, self._method_name)
        else:
            _actor_method = _wrap_actor_method(self._original_class,
                                               self._method_name)
        workflow_data = WorkflowData(
            func_body=_actor_method,
            inputs=workflow_inputs,
            name=self._name,
            step_options=self._options,
            user_metadata=self._user_metadata,
        )
        wf = Workflow(workflow_data)
        return wf
Пример #8
0
def run(entry_workflow: Workflow,
        workflow_id: Optional[str] = None,
        metadata: Optional[Dict] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously.
    """
    if metadata is not None:
        if not isinstance(metadata, dict):
            raise ValueError("metadata must be a dict.")
        for k, v in metadata.items():
            try:
                json.dumps(v)
            except TypeError as e:
                raise ValueError("metadata values must be JSON serializable, "
                                 "however '{}' has a value whose {}.".format(
                                     k, e))
    metadata = metadata or {}

    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    logger.info(
        f"Workflow job created. [id=\"{workflow_id}\", storage_url="
        f"\"{store.storage_url}\"]. Type: {entry_workflow.data.step_type} ")

    with workflow_context.workflow_step_context(workflow_id,
                                                store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists:
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION)
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id,
                                                  ignore_existing))
        if entry_workflow.data.step_type == StepType.FUNCTION:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #9
0
def run(
    entry_workflow: Workflow,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict] = None,
) -> ray.ObjectRef:
    """Run a workflow asynchronously."""
    validate_user_metadata(metadata)
    metadata = metadata or {}

    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"
    step_type = entry_workflow.data.step_options.step_type

    logger.info(
        f'Workflow job created. [id="{workflow_id}", storage_url='
        f'"{store.storage_url}"]. Type: {step_type}.'
    )

    with workflow_context.workflow_step_context(workflow_id, store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # "Is growing" means we could adding steps to the (top-level)
        # workflow to grow the workflow dynamically at runtime.
        is_growing = step_type not in (StepType.FUNCTION, StepType.WAIT)

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if is_growing or not wf_exists:
            # We must checkpoint entry workflow.
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = is_growing
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)
        )
        if not is_growing:
            return flatten_workflow_output(workflow_id, result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #10
0
def get_actor(actor_id: str) -> "VirtualActor":
    """Get an virtual actor.

    Args:
        actor_id: The ID of the actor.

    Returns:
        A virtual actor.
    """
    ensure_ray_initialized()
    return virtual_actor_class.get_actor(actor_id, storage_base.get_global_storage())
Пример #11
0
def get_workflow_storage(workflow_id: Optional[str] = None) -> WorkflowStorage:
    """Get the storage for the workflow.

    Args:
        workflow_id: The ID of the storage.

    Returns:
        A workflow storage.
    """
    store = storage.get_global_storage()
    if workflow_id is None:
        workflow_id = workflow_context.get_workflow_step_context().workflow_id
    return WorkflowStorage(workflow_id, store)
Пример #12
0
 def _readonly_actor_method(*args, **kwargs):
     storage = get_global_storage()
     instance = cls.__new__(cls)
     try:
         state = get_latest_output(actor_id, storage)
     except Exception as e:
         raise VirtualActorNotInitializedError(
             f"Virtual actor '{actor_id}' has not been initialized. "
             "We cannot get the latest state for the "
             "readonly virtual actor.") from e
     __setstate(instance, state)
     method = getattr(instance, method_name)
     return method(*args, **kwargs)
Пример #13
0
def test_checkpoint_dag_full(workflow_start_regular):
    global_storage = storage.get_global_storage()

    outputs = utils.run_workflow_dag_with_options(
        checkpoint_dag, (True,), workflow_id="checkpoint_whole", name="checkpoint_dag"
    )
    assert np.isclose(outputs, 8388607.5)
    recovered = ray.get(workflow.resume("checkpoint_whole"))
    assert np.isclose(recovered, 8388607.5)

    wf_storage = workflow_storage.WorkflowStorage("checkpoint_whole", global_storage)
    _assert_step_checkpoints(wf_storage, "checkpoint_dag", mode="checkpointed")
    _assert_step_checkpoints(wf_storage, "large_input", mode="checkpointed")
    _assert_step_checkpoints(wf_storage, "identity", mode="checkpointed")
    _assert_step_checkpoints(wf_storage, "average", mode="checkpointed")
Пример #14
0
def resume(workflow_id: str) -> ray.ObjectRef:
    """Resume a workflow asynchronously. See "api.resume()" for details."""
    storage = get_global_storage()
    logger.info(f'Resuming workflow [id="{workflow_id}", storage_url='
                f'"{storage.storage_url}"].')
    workflow_manager = get_or_create_management_actor()
    # NOTE: It is important to 'ray.get' the returned output. This
    # ensures caller of 'run()' holds the reference to the workflow
    # result. Otherwise if the actor removes the reference of the
    # workflow output, the caller may fail to resolve the result.
    result: "WorkflowExecutionResult" = ray.get(
        workflow_manager.run_or_resume.remote(workflow_id,
                                              ignore_existing=False))
    logger.info(f"Workflow job {workflow_id} resumed.")
    return flatten_workflow_output(workflow_id, result.persisted_output)
Пример #15
0
def test_dedupe_download_raw_ref(workflow_start_regular):
    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(get_global_storage(), temp_dir)
        utils._alter_storage(debug_store)

        ref = ray.put("hello")
        workflows = [identity.bind(ref) for _ in range(100)]

        workflow.create(gather.bind(*workflows)).run()

        ops = debug_store._logged_storage.get_op_counter()
        get_objects_count = 0
        for key in ops["get"]:
            if "objects" in key:
                get_objects_count += 1
        assert get_objects_count == 1
Пример #16
0
def test_dedupe_downloads_list(workflow_start_regular):
    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(get_global_storage(), temp_dir)
        utils._alter_storage(debug_store)

        numbers = [ray.put(i) for i in range(5)]
        workflows = [identity.step(numbers) for _ in range(100)]

        gather.step(*workflows).run()

        ops = debug_store._logged_storage.get_op_counter()
        get_objects_count = 0
        for key in ops["get"]:
            if "objects" in key:
                get_objects_count += 1
        assert get_objects_count == 5
Пример #17
0
async def test_kv_storage(workflow_start_regular):
    kv_store = storage.get_global_storage()
    json_data = {"hello": "world"}
    bin_data = (31416).to_bytes(8, "big")
    key_1 = kv_store.make_key("aaa", "bbb", "ccc")
    key_2 = kv_store.make_key("aaa", "ddd")
    key_3 = kv_store.make_key("aaa", "eee")
    await kv_store.put(key_1, json_data, is_json=True)
    await kv_store.put(key_2, bin_data, is_json=False)
    assert json_data == await kv_store.get(key_1, is_json=True)
    assert bin_data == await kv_store.get(key_2, is_json=False)
    with pytest.raises(storage.KeyNotFoundError):
        await kv_store.get(key_3)
    prefix = kv_store.make_key("aaa")
    assert set(await kv_store.scan_prefix(prefix)) == {"bbb", "ddd"}
    assert set(await kv_store.scan_prefix(kv_store.make_key(""))) == {"aaa"}
Пример #18
0
def test_checkpoint_dag_skip_partial(workflow_start_regular):
    global_storage = storage.get_global_storage()

    outputs = (
        checkpoint_dag.options(name="checkpoint_dag")
        .step(False)
        .run(workflow_id="checkpoint_partial")
    )
    assert np.isclose(outputs, 8388607.5)
    recovered = ray.get(workflow.resume("checkpoint_partial"))
    assert np.isclose(recovered, 8388607.5)

    wf_storage = workflow_storage.WorkflowStorage("checkpoint_partial", global_storage)
    _assert_step_checkpoints(wf_storage, "checkpoint_dag", mode="checkpointed")
    _assert_step_checkpoints(wf_storage, "large_input", mode="output_skipped")
    _assert_step_checkpoints(wf_storage, "identity", mode="output_skipped")
    _assert_step_checkpoints(wf_storage, "average", mode="checkpointed")
Пример #19
0
def init_management_actor() -> None:
    """Initialize WorkflowManagementActor"""
    store = storage.get_global_storage()
    try:
        workflow_manager = get_management_actor()
        storage_url = ray.get(workflow_manager.get_storage_url.remote())
        if storage_url != store.storage_url:
            raise RuntimeError("The workflow is using a storage "
                               f"({store.storage_url}) different from the "
                               f"workflow manager({storage_url}).")
    except ValueError:
        logger.info("Initializing workflow manager...")
        # the actor does not exist
        WorkflowManagementActor.options(
            name=common.MANAGEMENT_ACTOR_NAME,
            namespace=common.MANAGEMENT_ACTOR_NAMESPACE,
            lifetime="detached").remote(store)
Пример #20
0
def test_crash_during_event_checkpointing(workflow_start_regular_shared):
    """Ensure that if the cluster dies while the event is being checkpointed, we
    properly re-poll for the event."""
    _storage = storage.get_global_storage()
    """Ensure that we don't re-call poll_for_event after `event_checkpointed`
       returns, even after a crash."""

    class MyEventListener(workflow.EventListener):
        async def poll_for_event(self):
            assert not utils.check_global_mark("committed")
            if utils.check_global_mark("first"):
                utils.set_global_mark("second")
            utils.set_global_mark("first")

            utils.set_global_mark("time_to_die")
            while not utils.check_global_mark("resume"):
                time.sleep(0.1)

        async def event_checkpointed(self, event):
            utils.set_global_mark("committed")

    @workflow.step
    def wait_then_finish(arg):
        pass

    event_promise = workflow.wait_for_event(MyEventListener)
    wait_then_finish.step(event_promise).run_async("workflow")

    while not utils.check_global_mark("time_to_die"):
        time.sleep(0.1)

    assert utils.check_global_mark("first")
    ray.shutdown()
    subprocess.check_output(["ray", "stop", "--force"])

    # Give the workflow some time to kill the cluster.
    # time.sleep(3)

    ray.init(num_cpus=4)
    workflow.init(storage=_storage)
    workflow.resume("workflow")
    utils.set_global_mark("resume")

    ray.get(workflow.get_output("workflow"))
    assert utils.check_global_mark("second")
Пример #21
0
def run(entry_workflow: Workflow,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously.
    """
    store = get_global_storage()
    assert ray.is_initialized()
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{store.storage_url}\"].")

    with workflow_context.workflow_step_context(workflow_id,
                                                store.storage_url):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists:
            commit_step(ws, "", entry_workflow, None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION)
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(workflow_id,
                                                  ignore_existing))
        if entry_workflow.data.step_type == StepType.FUNCTION:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Пример #22
0
def get_or_create_management_actor() -> "ActorHandle":
    """Get or create WorkflowManagementActor"""
    # TODO(suquark): We should not get the actor everytime. We also need to
    # resume the actor if it failed. Using a global variable to cache the
    # actor seems not enough to resume the actor, because there is no
    # aliveness detection for an actor.
    try:
        workflow_manager = get_management_actor()
    except ValueError:
        store = storage.get_global_storage()
        # the actor does not exist
        logger.warning("Cannot access workflow manager. It could be because "
                       "the workflow manager exited unexpectedly. A new "
                       "workflow manager is being created with storage "
                       f"'{store}'.")
        workflow_manager = WorkflowManagementActor.options(
            name=common.MANAGEMENT_ACTOR_NAME,
            namespace=common.MANAGEMENT_ACTOR_NAMESPACE,
            lifetime="detached").remote(store)
    return workflow_manager
Пример #23
0
def get_or_create_manager(warn_on_creation: bool = True) -> "ActorHandle":
    """Get or create the storage manager."""
    # TODO(suquark): We should not get the actor everytime. We also need to
    # resume the actor if it failed. Using a global variable to cache the
    # actor seems not enough to resume the actor, because there is no
    # aliveness detection for an actor.
    try:
        return ray.get_actor(common.STORAGE_ACTOR_NAME,
                             namespace=common.MANAGEMENT_ACTOR_NAMESPACE)
    except ValueError:
        store = storage.get_global_storage()
        if warn_on_creation:
            logger.warning("Cannot access workflow serialization manager. It "
                           "could be because "
                           "the workflow manager exited unexpectedly. A new "
                           "workflow manager is being created with storage "
                           f"'{store}'.")
        handle = Manager.options(name=common.STORAGE_ACTOR_NAME,
                                 namespace=common.MANAGEMENT_ACTOR_NAMESPACE,
                                 lifetime="detached").remote(store)
        ray.get(handle.ping.remote())
        return handle
Пример #24
0
def init(storage: "Optional[Union[str, Storage]]" = None) -> None:
    """Initialize workflow.

    Args:
        storage: The external storage URL or a custom storage class. If not
            specified, ``/tmp/ray/workflow_data`` will be used.
    """
    if storage is None:
        storage = os.environ.get("RAY_WORKFLOW_STORAGE")

    if storage is None:
        # We should use get_temp_dir_path, but for ray client, we don't
        # have this one. We need a flag to tell whether it's a client
        # or a driver to use the right dir.
        # For now, just use /tmp/ray/workflow_data
        storage = "file:///tmp/ray/workflow_data"
    if isinstance(storage, str):
        logger.info(f"Using storage: {storage}")
        storage = storage_base.create_storage(storage)
    elif not isinstance(storage, Storage):
        raise TypeError("'storage' should be None, str, or Storage type.")

    try:
        _storage = storage_base.get_global_storage()
    except RuntimeError:
        pass
    else:
        # we have to use the 'else' branch because we would raise a
        # runtime error, but we do not want to be captured by 'except'
        if _storage.storage_url == storage.storage_url:
            logger.warning("Calling 'workflow.init()' again with the same "
                           "storage.")
        else:
            raise RuntimeError("Calling 'workflow.init()' again with a "
                               "different storage")
    storage_base.set_global_storage(storage)
    workflow_access.init_management_actor()
    serialization.init_manager()
Пример #25
0
 def get_or_create(self, actor_id: str, *args, **kwargs):
     return actor_cls._get_or_create(actor_id,
                                     args=args,
                                     kwargs=kwargs,
                                     storage=get_global_storage())
Пример #26
0
 def get_or_create(self, actor_id: str, *args, **kwargs) -> "VirtualActor":
     """Create an actor. See `VirtualActorClassBase.create()`."""
     return self._get_or_create(actor_id,
                                args=args,
                                kwargs=kwargs,
                                storage=get_global_storage())
Пример #27
0
def test_workflow_storage(workflow_start_regular):
    workflow_id = test_workflow_storage.__name__
    wf_storage = workflow_storage.WorkflowStorage(workflow_id,
                                                  storage.get_global_storage())
    step_id = "some_step"
    step_options = WorkflowStepRuntimeOptions(
        step_type=StepType.FUNCTION,
        catch_exceptions=False,
        max_retries=1,
        ray_options={})
    input_metadata = {
        "name": "test_basic_workflows.append1",
        "workflows": ["def"],
        "workflow_refs": ["some_ref"],
        "step_options": step_options.to_dict(),
    }
    output_metadata = {
        "output_step_id": "a12423",
        "dynamic_output_step_id": "b1234"
    }
    flattened_args = [
        signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543"
    ]
    args = signature.recover_args(flattened_args)
    output = ["the_answer"]
    object_resolved = 42
    obj_ref = ray.put(object_resolved)

    # test basics
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_input_metadata(step_id), input_metadata,
            True))
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_function_body(step_id), some_func))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_args(step_id), flattened_args))

    asyncio_run(
        wf_storage._put(
            wf_storage._key_obj_id(obj_ref.hex()), ray.get(obj_ref)))
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_output_metadata(step_id), output_metadata,
            True))
    asyncio_run(wf_storage._put(wf_storage._key_step_output(step_id), output))

    assert wf_storage.load_step_output(step_id) == output
    assert wf_storage.load_step_args(step_id, [], []) == args
    assert wf_storage.load_step_func_body(step_id)(33) == 34
    assert ray.get(wf_storage.load_object_ref(
        obj_ref.hex())) == object_resolved

    # test "inspect_step"
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        output_object_valid=True)
    assert inspect_result.is_recoverable()

    step_id = "some_step2"
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_input_metadata(step_id), input_metadata,
            True))
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_function_body(step_id), some_func))
    asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args))
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_output_metadata(step_id), output_metadata,
            True))

    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        output_step_id=output_metadata["dynamic_output_step_id"])
    assert inspect_result.is_recoverable()

    step_id = "some_step3"
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_input_metadata(step_id), input_metadata,
            True))
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_function_body(step_id), some_func))
    asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args))
    inspect_result = wf_storage.inspect_step(step_id)
    step_options = WorkflowStepRuntimeOptions(
        step_type=StepType.FUNCTION,
        catch_exceptions=False,
        max_retries=1,
        ray_options={})
    assert inspect_result == workflow_storage.StepInspectResult(
        args_valid=True,
        func_body_valid=True,
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        step_options=step_options)
    assert inspect_result.is_recoverable()

    step_id = "some_step4"
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_input_metadata(step_id), input_metadata,
            True))
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_function_body(step_id), some_func))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        func_body_valid=True,
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        step_options=step_options)
    assert not inspect_result.is_recoverable()

    step_id = "some_step5"
    asyncio_run(
        wf_storage._put(
            wf_storage._key_step_input_metadata(step_id), input_metadata,
            True))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        step_options=step_options)
    assert not inspect_result.is_recoverable()

    step_id = "some_step6"
    inspect_result = wf_storage.inspect_step(step_id)
    print(inspect_result)
    assert inspect_result == workflow_storage.StepInspectResult()
    assert not inspect_result.is_recoverable()
Пример #28
0
def get_metadata(paths, is_json=True):
    store = get_global_storage()
    key = store.make_key(*paths)
    return asyncio.get_event_loop().run_until_complete(store.get(key, is_json))
Пример #29
0
def test_delete(workflow_start_regular):
    _storage = storage.get_global_storage()

    # Try deleting a random workflow that never existed.
    with pytest.raises(WorkflowNotFoundError):
        workflow.delete(workflow_id="never_existed")

    # Delete a workflow that has not finished and is not running.
    @workflow.step
    def never_ends(x):
        utils.set_global_mark()
        time.sleep(1000000)
        return x

    never_ends.step("hello world").run_async("never_finishes")

    # Make sure the step is actualy executing before killing the cluster
    while not utils.check_global_mark():
        time.sleep(0.1)

    # Restart
    ray.shutdown()
    subprocess.check_output("ray stop --force", shell=True)
    workflow.init(storage=_storage)

    with pytest.raises(ray.exceptions.RaySystemError):
        result = workflow.get_output("never_finishes")
        ray.get(result)

    workflow.delete("never_finishes")

    with pytest.raises(ValueError):
        ouput = workflow.get_output("never_finishes")

    # TODO(Alex): Uncomment after
    # https://github.com/ray-project/ray/issues/19481.
    # with pytest.raises(WorkflowNotFoundError):
    #     workflow.resume("never_finishes")

    with pytest.raises(WorkflowNotFoundError):
        workflow.delete(workflow_id="never_finishes")

    # Delete a workflow which has finished.
    @workflow.step
    def basic_step(arg):
        return arg

    result = basic_step.step("hello world").run(workflow_id="finishes")
    assert result == "hello world"
    ouput = workflow.get_output("finishes")
    assert ray.get(ouput) == "hello world"

    workflow.delete(workflow_id="finishes")

    with pytest.raises(ValueError):
        ouput = workflow.get_output("finishes")

    # TODO(Alex): Uncomment after
    # https://github.com/ray-project/ray/issues/19481.
    # with pytest.raises(ValueError):
    #     workflow.resume("finishes")

    with pytest.raises(WorkflowNotFoundError):
        workflow.delete(workflow_id="finishes")

    assert workflow.list_all() == []

    # The workflow can be re-run as if it was never run before.
    assert basic_step.step("123").run(workflow_id="finishes") == "123"
Пример #30
0
def test_workflow_storage(workflow_start_regular):
    workflow_id = test_workflow_storage.__name__
    wf_storage = workflow_storage.WorkflowStorage(workflow_id,
                                                  storage.get_global_storage())
    step_id = "some_step"
    step_options = WorkflowStepRuntimeOptions.make(step_type=StepType.FUNCTION)
    input_metadata = {
        "name": "test_basic_workflows.append1",
        "workflows": ["def"],
        "workflow_refs": ["some_ref"],
        "step_options": step_options.to_dict(),
    }
    output_metadata = {
        "output_step_id": "a12423",
        "dynamic_output_step_id": "b1234"
    }
    root_output_metadata = {"output_step_id": "c123"}
    flattened_args = [
        signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543"
    ]
    args = signature.recover_args(flattened_args)
    output = ["the_answer"]
    object_resolved = 42
    obj_ref = ray.put(object_resolved)

    # test basics
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_args(step_id), flattened_args))

    asyncio_run(
        wf_storage._put(wf_storage._key_obj_id(obj_ref.hex()),
                        ray.get(obj_ref)))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_output_metadata(step_id),
                        output_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_output_metadata(""),
                        root_output_metadata, True))
    asyncio_run(wf_storage._put(wf_storage._key_step_output(step_id), output))

    assert wf_storage.load_step_output(step_id) == output
    assert wf_storage.load_step_args(step_id, [], []) == args
    assert wf_storage.load_step_func_body(step_id)(33) == 34
    assert ray.get(wf_storage.load_object_ref(
        obj_ref.hex())) == object_resolved

    # test s3 path
    # here we hardcode the path to make sure s3 path is parsed correctly
    if isinstance(wf_storage._storage, S3StorageImpl):
        assert (asyncio_run(
            wf_storage._storage.get(
                "workflow/test_workflow_storage/steps/outputs.json",
                True)) == root_output_metadata)

    # test "inspect_step"
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        output_object_valid=True)
    assert inspect_result.is_recoverable()

    step_id = "some_step2"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_output_metadata(step_id),
                        output_metadata, True))

    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        output_step_id=output_metadata["dynamic_output_step_id"])
    assert inspect_result.is_recoverable()

    step_id = "some_step3"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    asyncio_run(wf_storage._put(wf_storage._key_step_args(step_id), args))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        args_valid=True,
        func_body_valid=True,
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        step_options=step_options,
    )
    assert inspect_result.is_recoverable()

    step_id = "some_step4"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    asyncio_run(
        wf_storage._put(wf_storage._key_step_function_body(step_id),
                        some_func))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        func_body_valid=True,
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        step_options=step_options,
    )
    assert not inspect_result.is_recoverable()

    step_id = "some_step5"
    asyncio_run(
        wf_storage._put(wf_storage._key_step_input_metadata(step_id),
                        input_metadata, True))
    inspect_result = wf_storage.inspect_step(step_id)
    assert inspect_result == workflow_storage.StepInspectResult(
        workflows=input_metadata["workflows"],
        workflow_refs=input_metadata["workflow_refs"],
        step_options=step_options,
    )
    assert not inspect_result.is_recoverable()

    step_id = "some_step6"
    inspect_result = wf_storage.inspect_step(step_id)
    print(inspect_result)
    assert inspect_result == workflow_storage.StepInspectResult()
    assert not inspect_result.is_recoverable()