def test_file_handling_local_file_gets_force_no_copy(): @task def t1() -> FlyteFile: # Use this test file itself, since we know it exists. return FlyteFile(__file__, remote_path=False) @workflow def my_wf() -> FlyteFile: return t1() random_dir = context_manager.FlyteContext.current_context( ).file_access.get_random_local_directory() fs = FileAccessProvider(local_sandbox_dir=random_dir) with context_manager.FlyteContext.current_context( ).new_file_access_context(file_access_provider=fs): top_level_files = os.listdir(random_dir) assert len(top_level_files ) == 2 # the mock_remote folder and the local folder mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote")) assert len( mock_remote_files) == 0 # the mock_remote folder itself is empty workflow_output = my_wf() # After running, this test file should've been copied to the mock remote location. mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote")) assert len(mock_remote_files) == 0 # Because Flyte doesn't presume to handle a uri that look like a raw path, the path that is returned is # the original. assert workflow_output.path == __file__
def test_file_handling_local_file_gets_copied(): @task def t1() -> FlyteFile: # Use this test file itself, since we know it exists. return __file__ @workflow def my_wf() -> FlyteFile: return t1() random_dir = context_manager.FlyteContext.current_context( ).file_access.get_random_local_directory() fs = FileAccessProvider(local_sandbox_dir=random_dir) with context_manager.FlyteContext.current_context( ).new_file_access_context(file_access_provider=fs): top_level_files = os.listdir(random_dir) assert len(top_level_files ) == 2 # the mock_remote folder and the local folder mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote")) assert len( mock_remote_files) == 0 # the mock_remote folder itself is empty x = my_wf() # After running, this test file should've been copied to the mock remote location. mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote")) assert len(mock_remote_files) == 1 # File should've been copied to the mock remote folder assert x.path.startswith(random_dir)
def test_engine_file_output(): basic_blob_type = _core_types.BlobType( format="", dimensionality=_core_types.BlobType.BlobDimensionality.SINGLE, ) fs = FileAccessProvider(local_sandbox_dir="/tmp/flytetesting") with context_manager.FlyteContext.current_context( ).new_file_access_context(file_access_provider=fs) as ctx: # Write some text to a file not in that directory above test_file_location = "/tmp/sample.txt" with open(test_file_location, "w") as fh: fh.write("Hello World\n") lit = TypeEngine.to_literal(ctx, test_file_location, os.PathLike, LiteralType(blob=basic_blob_type)) # Since we're using local as remote, we should be able to just read the file from the 'remote' location. with open(lit.scalar.blob.uri, "r") as fh: assert fh.readline() == "Hello World\n" # We should also be able to turn the thing back into regular python native thing. redownloaded_local_file_location = TypeEngine.to_python_value( ctx, lit, os.PathLike) with open(redownloaded_local_file_location, "r") as fh: assert fh.readline() == "Hello World\n"
def test_file_handling_remote_file_handling_flyte_file(): SAMPLE_DATA = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv" @task def t1() -> FlyteFile: # Unlike the test above, this returns the remote path wrapped in a FlyteFile object return FlyteFile(SAMPLE_DATA) @workflow def my_wf() -> FlyteFile: return t1() # This creates a random directory that we know is empty. random_dir = context_manager.FlyteContext.current_context( ).file_access.get_random_local_directory() # Creating a new FileAccessProvider will add two folderst to the random dir fs = FileAccessProvider(local_sandbox_dir=random_dir) ctx = context_manager.FlyteContext.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_file_access(fs)) as ctx: working_dir = os.listdir(random_dir) assert len( working_dir) == 2 # the mock_remote folder and the local folder mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote")) assert len( mock_remote_files) == 0 # the mock_remote folder itself is empty workflow_output = my_wf() # After running the mock remote dir should still be empty, since the workflow_output has not been used mock_remote_files = os.listdir(os.path.join(random_dir, "mock_remote")) assert len(mock_remote_files) == 0 # While the literal returned by t1 does contain the web address as the uri, because it's a remote address, # flytekit will translate it back into a FlyteFile object on the local drive (but not download it) assert workflow_output.path.startswith(random_dir) # But the remote source should still be the https address assert workflow_output.remote_source == SAMPLE_DATA # The act of running the workflow should create the engine dir, and the directory that will contain the # file but the file itself isn't downloaded yet. working_dir = os.listdir(os.path.join(random_dir, "local_flytekit")) # This second layer should have two dirs, a random one generated by the new_execution_context call # and an empty folder, created by FlyteFile transformer's to_python_value function. This folder will have # something in it after we open() it. assert len(working_dir) == 2 assert not os.path.exists(workflow_output.path) # The act of opening it should trigger the download, since we do lazy downloading. with open(workflow_output, "rb"): ... assert os.path.exists(workflow_output.path) # The file name is maintained on download. assert str(workflow_output).endswith(os.path.split(SAMPLE_DATA)[1])
def test_transformer_to_literal_local(): random_dir = context_manager.FlyteContext.current_context( ).file_access.get_random_local_directory() fs = FileAccessProvider(local_sandbox_dir=random_dir) ctx = context_manager.FlyteContext.current_context() with context_manager.FlyteContextManager.with_context( ctx.with_file_access(fs)) as ctx: # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously. p = "/tmp/flyte/test_fd_transformer" # Create an empty directory and call to literal on it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) tf = FlyteDirToMultipartBlobTransformer() lt = tf.get_literal_type(FlyteDirectory) literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) assert literal.scalar.blob.uri.startswith(random_dir) # Create a director with one file in it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) with open(os.path.join(p, "xyz"), "w") as fh: fh.write("Hello world\n") literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) mock_remote_files = os.listdir(literal.scalar.blob.uri) assert mock_remote_files == ["xyz"] # The only primitives allowed are strings with pytest.raises(AssertionError): tf.to_literal(ctx, 3, FlyteDirectory, lt) # Can't use if it's not a directory with pytest.raises(AssertionError): p = "/tmp/flyte/xyz" path = pathlib.Path(p) try: path.unlink() except OSError: ... with open(p, "w") as fh: fh.write("hello world\n") tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt)
def test_transformer_to_literal_remote(): random_dir = context_manager.FlyteContext.current_context( ).file_access.get_random_local_directory() fs = FileAccessProvider(local_sandbox_dir=random_dir) with context_manager.FlyteContext.current_context( ).new_file_access_context(file_access_provider=fs) as ctx: # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously. p = "/tmp/flyte/test_fd_transformer" # Create an empty directory and call to literal on it if os.path.exists(p): shutil.rmtree(p) pathlib.Path(p).mkdir(parents=True) tf = FlyteDirToMultipartBlobTransformer() lt = tf.get_literal_type(FlyteDirectory) # Remote directories should be copied as is. literal = tf.to_literal(ctx, FlyteDirectory("s3://anything"), FlyteDirectory, lt) assert literal.scalar.blob.uri == "s3://anything"