Exemplo n.º 1
0
def test_failure_with_storage(workflow_start_regular):
    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(temp_dir)
        _alter_storage(debug_store)

        wf = construct_workflow(length=3)
        result = wf.run(workflow_id="complex_workflow")
        index = _locate_initial_commit(debug_store) + 1
        debug_store.log_off()

        def resume(num_records_replayed):
            key = debug_store.wrapped_storage.make_key("complex_workflow")
            asyncio_run(debug_store.wrapped_storage.delete_prefix(key))

            async def replay():
                # We need to replay one by one to avoid conflict
                for i in range(num_records_replayed):
                    await debug_store.replay(i)

            asyncio_run(replay())
            return ray.get(workflow.resume(workflow_id="complex_workflow"))

        with pytest.raises(ValueError):
            # in cases, the replayed records are too few to resume the
            # workflow.
            resume(index - 1)

        if isinstance(debug_store.wrapped_storage, FilesystemStorageImpl):
            # filesystem is faster, so we can cover all cases
            step_len = 1
        else:
            step_len = max((len(debug_store) - index) // 5, 1)

        for j in range(index, len(debug_store), step_len):
            assert resume(j) == result
Exemplo n.º 2
0
def test_nested_workflow_no_download(workflow_start_regular):
    """Test that we _only_ load from storage on recovery. For a nested workflow
    step, we should checkpoint the input/output, but continue to reuse the
    in-memory value.
    """
    @ray.remote
    def recursive(ref, count):
        if count == 0:
            return ref
        return workflow.continuation(recursive.bind(ref, count - 1))

    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(temp_dir)
        utils._alter_storage(debug_store)

        ref = ray.put("hello")
        result = workflow.create(recursive.bind([ref], 10)).run()

        ops = debug_store._logged_storage.get_op_counter()
        get_objects_count = 0
        for key in ops["get"]:
            if "objects" in key:
                get_objects_count += 1
        assert get_objects_count == 1, "We should only get once when resuming."
        put_objects_count = 0
        for key in ops["put"]:
            if "objects" in key:
                print(key)
                put_objects_count += 1
        assert (put_objects_count == 1
                ), "We should detect the object exists before uploading"
        assert ray.get(result) == ["hello"]
Exemplo n.º 3
0
def create_storage(storage_url: str) -> Storage:
    """A factory function that creates different type of storage according
    to the URL.

    Args:
        storage_url: A URL indicates the storage type and root path.
        Currently only two types of storages are supported: local fs and s3
        For local fs, a path is needed, it can be either a URI with scheme
        file:// or just a local path, i.e.:
           file:///local_path
           local_path

        For s3, bucket, path are necessary. In the meantime, other parameters
        can be passed as well, like credientials or regions, i.e.:
           s3://bucket/path?region_name=str&endpoint_url=str&aws_access_key_id=str&
               aws_secret_access_key=str&aws_session_token=str

        All parameters are optional and have the same meaning as boto3.client

    Returns:
        A storage instance.
    """
    parsed_url = parse.urlparse(storage_url)
    if parsed_url.scheme == "file" or parsed_url.scheme == "":
        from ray.workflow.storage.filesystem import FilesystemStorageImpl

        return FilesystemStorageImpl(parsed_url.path)
    elif parsed_url.scheme == "s3":
        from ray.workflow.storage.s3 import S3StorageImpl

        bucket = parsed_url.netloc
        s3_path = parsed_url.path.lstrip("/")
        if not s3_path:
            raise ValueError(f"Invalid s3 path: {s3_path}")
        params = dict(parse.parse_qsl(parsed_url.query))
        return S3StorageImpl(bucket, s3_path, **params)
    elif parsed_url.scheme == "debug":
        from ray.workflow.storage.debug import DebugStorage

        params = dict(parse.parse_qsl(parsed_url.query))
        return DebugStorage(create_storage(params["storage"]),
                            path=parsed_url.path)
    else:
        extra_msg = ""
        if os.name == "nt":
            extra_msg = (
                " Try using file://{} or file:///{} for Windows file paths.".
                format(storage_url, storage_url))
        raise ValueError(f"Invalid url: {storage_url}." + extra_msg)
Exemplo n.º 4
0
def test_dedupe_download_raw_ref(workflow_start_regular):
    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(temp_dir)
        utils._alter_storage(debug_store)

        ref = ray.put("hello")
        workflows = [identity.bind(ref) for _ in range(100)]

        workflow.create(gather.bind(*workflows)).run()

        ops = debug_store._logged_storage.get_op_counter()
        get_objects_count = 0
        for key in ops["get"]:
            if "objects" in key:
                get_objects_count += 1
        assert get_objects_count == 1
Exemplo n.º 5
0
def test_dedupe_downloads_list(workflow_start_regular):
    with tempfile.TemporaryDirectory() as temp_dir:
        debug_store = DebugStorage(get_global_storage(), temp_dir)
        utils._alter_storage(debug_store)

        numbers = [ray.put(i) for i in range(5)]
        workflows = [identity.bind(numbers) for _ in range(100)]

        workflow.create(gather.bind(*workflows)).run()

        ops = debug_store._logged_storage.get_op_counter()
        get_objects_count = 0
        for key in ops["get"]:
            if "objects" in key:
                get_objects_count += 1
        assert get_objects_count == 5
Exemplo n.º 6
0
def _locate_initial_commit(debug_store: DebugStorage) -> int:
    for i in range(len(debug_store)):
        log = debug_store.get_log(i)
        if log["key"].endswith(STEP_OUTPUTS_METADATA):
            return i
    return -1