Пример #1
0
def test_file_handling_local_file_gets_force_no_copy():
    @task
    def t1() -> FlyteFile:
        # Use this test file itself, since we know it exists.
        return FlyteFile(__file__, remote_path=False)

    @workflow
    def my_wf() -> FlyteFile:
        return t1()

    random_dir = context_manager.FlyteContext.current_context(
    ).file_access.get_random_local_directory()
    fs = FileAccessProvider(local_sandbox_dir=random_dir)
    with context_manager.FlyteContext.current_context(
    ).new_file_access_context(file_access_provider=fs):
        top_level_files = os.listdir(random_dir)
        assert len(top_level_files
                   ) == 2  # the mock_remote folder and the local folder

        mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote"))
        assert len(
            mock_remote_files) == 0  # the mock_remote folder itself is empty

        workflow_output = my_wf()

        # After running, this test file should've been copied to the mock remote location.
        mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote"))
        assert len(mock_remote_files) == 0

        # Because Flyte doesn't presume to handle a uri that look like a raw path, the path that is returned is
        # the original.
        assert workflow_output.path == __file__
Пример #2
0
def test_file_handling_local_file_gets_copied():
    @task
    def t1() -> FlyteFile:
        # Use this test file itself, since we know it exists.
        return __file__

    @workflow
    def my_wf() -> FlyteFile:
        return t1()

    random_dir = context_manager.FlyteContext.current_context(
    ).file_access.get_random_local_directory()
    fs = FileAccessProvider(local_sandbox_dir=random_dir)
    with context_manager.FlyteContext.current_context(
    ).new_file_access_context(file_access_provider=fs):
        top_level_files = os.listdir(random_dir)
        assert len(top_level_files
                   ) == 2  # the mock_remote folder and the local folder

        mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote"))
        assert len(
            mock_remote_files) == 0  # the mock_remote folder itself is empty

        x = my_wf()

        # After running, this test file should've been copied to the mock remote location.
        mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote"))
        assert len(mock_remote_files) == 1
        # File should've been copied to the mock remote folder
        assert x.path.startswith(random_dir)
Пример #3
0
def test_engine_file_output():
    basic_blob_type = _core_types.BlobType(
        format="",
        dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE,
    )

    fs = FileAccessProvider(local_sandbox_dir="/tmp/flytetesting")
    with context_manager.FlyteContext.current_context(
    ).new_file_access_context(file_access_provider=fs) as ctx:
        # Write some text to a file not in that directory above
        test_file_location = "/tmp/sample.txt"
        with open(test_file_location, "w") as fh:
            fh.write("Hello World\n")

        lit = TypeEngine.to_literal(ctx, test_file_location, os.PathLike,
                                    LiteralType(blob=basic_blob_type))

        # Since we're using local as remote, we should be able to just read the file from the 'remote' location.
        with open(lit.scalar.blob.uri, "r") as fh:
            assert fh.readline() == "Hello World\n"

        # We should also be able to turn the thing back into regular python native thing.
        redownloaded_local_file_location = TypeEngine.to_python_value(
            ctx, lit, os.PathLike)
        with open(redownloaded_local_file_location, "r") as fh:
            assert fh.readline() == "Hello World\n"
Пример #4
0
def test_file_handling_remote_file_handling_flyte_file():
    SAMPLE_DATA = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"

    @task
    def t1() -> FlyteFile:
        # Unlike the test above, this returns the remote path wrapped in a FlyteFile object
        return FlyteFile(SAMPLE_DATA)

    @workflow
    def my_wf() -> FlyteFile:
        return t1()

    # This creates a random directory that we know is empty.
    random_dir = context_manager.FlyteContext.current_context(
    ).file_access.get_random_local_directory()
    # Creating a new FileAccessProvider will add two folderst to the random dir
    fs = FileAccessProvider(local_sandbox_dir=random_dir)
    ctx = context_manager.FlyteContext.current_context()
    with context_manager.FlyteContextManager.with_context(
            ctx.with_file_access(fs)) as ctx:
        working_dir = os.listdir(random_dir)
        assert len(
            working_dir) == 2  # the mock_remote folder and the local folder

        mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote"))
        assert len(
            mock_remote_files) == 0  # the mock_remote folder itself is empty

        workflow_output = my_wf()

        # After running the mock remote dir should still be empty, since the workflow_output has not been used
        mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote"))
        assert len(mock_remote_files) == 0

        # While the literal returned by t1 does contain the web address as the uri, because it's a remote address,
        # flytekit will translate it back into a FlyteFile object on the local drive (but not download it)
        assert workflow_output.path.startswith(random_dir)
        # But the remote source should still be the https address
        assert workflow_output.remote_source == SAMPLE_DATA

        # The act of running the workflow should create the engine dir, and the directory that will contain the
        # file but the file itself isn't downloaded yet.
        working_dir = os.listdir(os.path.join(random_dir, "local_flytekit"))
        # This second layer should have two dirs, a random one generated by the new_execution_context call
        # and an empty folder, created by FlyteFile transformer's to_python_value function. This folder will have
        # something in it after we open() it.
        assert len(working_dir) == 2

        assert not os.path.exists(workflow_output.path)
        # The act of opening it should trigger the download, since we do lazy downloading.
        with open(workflow_output, "rb"):
            ...
        assert os.path.exists(workflow_output.path)

        # The file name is maintained on download.
        assert str(workflow_output).endswith(os.path.split(SAMPLE_DATA)[1])
Пример #5
0
def test_transformer_to_literal_local():
    random_dir = context_manager.FlyteContext.current_context(
    ).file_access.get_random_local_directory()
    fs = FileAccessProvider(local_sandbox_dir=random_dir)
    ctx = context_manager.FlyteContext.current_context()
    with context_manager.FlyteContextManager.with_context(
            ctx.with_file_access(fs)) as ctx:
        # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we
        # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously.
        p = "/tmp/flyte/test_fd_transformer"

        # Create an empty directory and call to literal on it
        if os.path.exists(p):
            shutil.rmtree(p)
        pathlib.Path(p).mkdir(parents=True)

        tf = FlyteDirToMultipartBlobTransformer()
        lt = tf.get_literal_type(FlyteDirectory)
        literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
        assert literal.scalar.blob.uri.startswith(random_dir)

        # Create a director with one file in it
        if os.path.exists(p):
            shutil.rmtree(p)
        pathlib.Path(p).mkdir(parents=True)
        with open(os.path.join(p, "xyz"), "w") as fh:
            fh.write("Hello world\n")
        literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)

        mock_remote_files = os.listdir(literal.scalar.blob.uri)
        assert mock_remote_files == ["xyz"]

        # The only primitives allowed are strings
        with pytest.raises(AssertionError):
            tf.to_literal(ctx, 3, FlyteDirectory, lt)

        # Can't use if it's not a directory
        with pytest.raises(AssertionError):
            p = "/tmp/flyte/xyz"
            path = pathlib.Path(p)
            try:
                path.unlink()
            except OSError:
                ...
            with open(p, "w") as fh:
                fh.write("hello world\n")
            tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
Пример #6
0
def test_transformer_to_literal_remote():
    random_dir = context_manager.FlyteContext.current_context(
    ).file_access.get_random_local_directory()
    fs = FileAccessProvider(local_sandbox_dir=random_dir)
    with context_manager.FlyteContext.current_context(
    ).new_file_access_context(file_access_provider=fs) as ctx:
        # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we
        # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously.
        p = "/tmp/flyte/test_fd_transformer"
        # Create an empty directory and call to literal on it
        if os.path.exists(p):
            shutil.rmtree(p)
        pathlib.Path(p).mkdir(parents=True)

        tf = FlyteDirToMultipartBlobTransformer()
        lt = tf.get_literal_type(FlyteDirectory)

        # Remote directories should be copied as is.
        literal = tf.to_literal(ctx, FlyteDirectory("s3://anything"),
                                FlyteDirectory, lt)
        assert literal.scalar.blob.uri == "s3://anything"