def main(): # Connects to a pachyderm cluster on the default host:port # (`localhost:30650`). This will work for certain environments (e.g. k8s # running on docker for mac), as well as when port forwarding is being # used. For other setups, you'll want one of the alternatives: # 1) To connect to pachyderm when this script is running inside the # cluster, use `python_pachyderm.Client.new_in_cluster()`. # 2) To connect to pachyderm via a pachd address, use # `python_pachyderm.Client.new_from_pachd_address`. # 3) To explicitly set the host and port, pass parameters into # `python_pachyderm.Client()`. client = python_pachyderm.Client() # Create a repo called images client.create_repo("images") # Create a pipeline specifically designed for executing python code. This # is equivalent to the edges pipeline in the standard opencv example. python_pachyderm.create_python_pipeline( client, relpath("edges"), input=python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/*", repo="images")), ) # Create the montage pipeline client.create_pipeline( "montage", transform=python_pachyderm.Transform( cmd=["sh"], image="v4tech/imagemagick", stdin=[ "montage -shadow -background SkyBlue -geometry 300x300+2+2 $(find /pfs -type f | sort) /pfs/out/montage.png" ], ), input=python_pachyderm.Input(cross=[ python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/", repo="images")), python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/", repo="edges")), ]), ) with client.commit("images", "master") as commit: # Add some images, recursively inserting content from the images # directory. Alternatively, you could use `client.put_file_url` or # `client_put_file_bytes`. python_pachyderm.put_files(client, relpath("images"), commit, "/") # Wait for the commit (and its downstream commits) to finish for _ in client.flush_commit([commit]): pass # Get the montage source_file = client.get_file("montage/master", "/montage.png") with tempfile.NamedTemporaryFile(suffix="montage.png", delete=False) as dest_file: shutil.copyfileobj(source_file, dest_file) print("montage written to {}".format(dest_file.name))
def test_create_python_pipeline_bad_path(): client = python_pachyderm.Client() repo_name = util.create_test_repo(client, "create_python_pipeline_bad_path") # create some sample data with client.commit(repo_name, "master") as commit: client.put_file_bytes(commit, 'file.dat', b'DATA') # create a pipeline from a file that does not exist - should fail with pytest.raises(Exception): python_pachyderm.create_python_pipeline( client, "./foobar2000", input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name)), )
def main(): client = python_pachyderm.Client() client.create_pipeline( pipeline_name="producer", transform=python_pachyderm.Transform( cmd=["python3", "/app/main.py"], image="ysimonson/pachyderm_spout_producer", ), spout=python_pachyderm.Spout( overwrite=False, marker="marker", ), ) python_pachyderm.create_python_pipeline( client, relpath("consumer"), input=python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/", repo="producer")), )
def test_create_python_pipeline(): client = python_pachyderm.Client() repo_name = util.create_test_repo(client, "create_python_pipeline") pfs_input = python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name)) pipeline_name = util.test_repo_name("create_python_pipeline", prefix="pipeline") # create some sample data with client.commit(repo_name, "master") as commit: client.put_file_bytes(commit, 'file.dat', b'DATA') # convenience function for verifying expected files exist def check_all_expected_files(extra_source_files, extra_build_files): list(client.flush_commit([c.commit for c in client.list_commit(pipeline_name)])) check_expected_files(client, "{}_build/source".format(pipeline_name), set([ "/", "/main.py", *extra_source_files, ])) check_expected_files(client, "{}_build/build".format(pipeline_name), set([ "/", "/run.sh", *extra_build_files, ])) check_expected_files(client, "{}/master".format(pipeline_name), set([ "/", "/file.dat", ])) # 1) create a pipeline from a directory with a main.py and requirements.txt with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d: with open(os.path.join(d, "main.py"), "w") as f: f.write(TEST_LIB_SOURCE.format(repo_name)) with open(os.path.join(d, "requirements.txt"), "w") as f: f.write(TEST_REQUIREMENTS_SOURCE) python_pachyderm.create_python_pipeline( client, d, input=pfs_input, pipeline_name=pipeline_name, ) check_all_expected_files( ["/requirements.txt"], ["/leftpad-0.1.2-py3-none-any.whl", "/termcolor-1.1.0-py3-none-any.whl"], ) file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat')) assert file == [b' DATA'] # 2) update pipeline from a directory without a requirements.txt with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d: with open(os.path.join(d, "main.py"), "w") as f: f.write(TEST_STDLIB_SOURCE.format(repo_name)) python_pachyderm.create_python_pipeline( client, d, input=pfs_input, pipeline_name=pipeline_name, update=True, ) check_all_expected_files([], []) file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat')) assert file == [b'DATA']