def test_docker_agent_deploy_flow_run_config(api, image_on_run_config): if image_on_run_config: storage = Local() image = "on-run-config" run = DockerRun(image=image, env={"TESTING": "VALUE"}) else: storage = Docker( registry_url="testing", image_name="on-storage", image_tag="tag" ) image = "testing/on-storage:tag" run = DockerRun(env={"TESTING": "VALUE"}) agent = DockerAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "id": "foo", "storage": storage.serialize(), "run_config": run.serialize(), "core_version": "0.13.11", } ), "id": "id", "name": "name", } ) ) assert api.create_container.called assert api.create_container.call_args[0][0] == image assert api.create_container.call_args[1]["environment"]["TESTING"] == "VALUE"
def test_docker_agent_deploy_flow_run_config(api, run_kind, has_docker_storage): if has_docker_storage: storage = Docker(registry_url="testing", image_name="on-storage", image_tag="tag") image = "testing/on-storage:tag" else: storage = Local() image = "on-run-config" if run_kind == "docker" else "prefecthq/prefect:0.13.11" if run_kind == "docker": env = {"TESTING": "VALUE"} host_config = {"auto_remove": False, "shm_size": "128m"} exp_host_config = { "auto_remove": False, "extra_hosts": { "host.docker.internal": "host-gateway" }, "shm_size": "128m", } run = DockerRun(image=image, env=env, host_config=host_config) else: env = {} host_config = {} exp_host_config = { "auto_remove": True, "extra_hosts": { "host.docker.internal": "host-gateway" }, } run = None if run_kind == "missing" else UniversalRun() agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": storage.serialize(), "core_version": "0.13.11", }), "run_config": run.serialize() if run else None, "id": "id", "name": "name", })) assert api.create_container.called assert api.create_container.call_args[0][0] == image res_env = api.create_container.call_args[1]["environment"] for k, v in env.items(): assert res_env[k] == v res_host_config = api.create_host_config.call_args[1] for k, v in exp_host_config.items(): assert res_host_config[k] == v
def test_no_args(): config = DockerRun() assert config.env is None assert config.image is None assert config.ports is None assert config.labels == set() assert config.host_config is None
def test_get_flow_image_run_config(): flow = Flow( "test", run_config=DockerRun(image="repo/name:tag"), storage=Local(), ) image = get_flow_image(flow=flow) assert image == "repo/name:tag"
def test_api_url_can_be_overridden_with_run_config(api): agent = DockerAgent(env_vars={"PREFECT__CLOUD__API": "FOO"}) run = DockerRun(env={"PREFECT__CLOUD__API": "BAR"}, ) env_vars = agent.populate_env_vars( GraphQLResult({ "id": "id", "name": "name", "flow": { "id": "foo" }, "run_config": run.serialize(), }), "test-image", run_config=run, ) assert env_vars["PREFECT__CLOUD__API"] == "BAR"
def configure_docker(): # Using Docker fp = Path(__file__) flow.storage = Docker( python_dependencies=["git+https://github.com/steph-ben/datafetch.git"], stored_as_script=True, path=f"/flow/{ fp.name }", files={fp.absolute(): f"/flow/{ fp.name }"}, build_kwargs={'nocache': False}) flow.run_config = DockerRun()
def test_all_args(tmpdir): working_dir = str(tmpdir) config = DockerRun( env={"hello": "world"}, image="testing", labels=["a", "b"], ) assert config.env == {"hello": "world"} assert config.image == "testing" assert config.labels == {"a", "b"}
def test_populate_env_vars_from_run_config(api): agent = DockerAgent(env_vars={"KEY1": "VAL1", "KEY2": "VAL2"}) run = DockerRun( env={"KEY2": "OVERRIDE", "PREFECT__LOGGING__LEVEL": "TEST"}, ) env_vars = agent.populate_env_vars( GraphQLResult( { "id": "id", "name": "name", "flow": {"id": "foo", "run_config": run.serialize()}, } ), run, ) assert env_vars["KEY1"] == "VAL1" assert env_vars["KEY2"] == "OVERRIDE" assert env_vars["PREFECT__LOGGING__LEVEL"] == "TEST"
def test_docker_agent_deploy_flow_run_config(api, run_kind, has_docker_storage): if has_docker_storage: storage = Docker( registry_url="testing", image_name="on-storage", image_tag="tag" ) image = "testing/on-storage:tag" else: storage = Local() image = "on-run-config" if run_kind == "docker" else "prefecthq/prefect:0.13.11" if run_kind == "docker": env = {"TESTING": "VALUE"} run = DockerRun(image=image, env=env) else: env = {} run = None if run_kind == "missing" else UniversalRun() agent = DockerAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "id": "foo", "name": "flow-name", "storage": storage.serialize(), "core_version": "0.13.11", } ), "run_config": run.serialize() if run else None, "id": "id", "name": "name", } ) ) assert api.create_container.called assert api.create_container.call_args[0][0] == image res_env = api.create_container.call_args[1]["environment"] for k, v in env.items(): assert res_env[k] == v
def test_prefect_logging_level_override_logic(config, agent_env_vars, run_config_env_vars, expected_logging_level, api): with set_temporary_config(config): agent = DockerAgent(env_vars=agent_env_vars) run = DockerRun(env=run_config_env_vars) env_vars = agent.populate_env_vars( GraphQLResult({ "id": "id", "name": "name", "flow": { "id": "foo" }, "run_config": run.serialize(), }), "test-image", run_config=run, ) assert env_vars["PREFECT__LOGGING__LEVEL"] == expected_logging_level
def test_all_args(tmpdir): working_dir = str(tmpdir) config = DockerRun( env={"hello": "world"}, image="testing", labels=["a", "b"], ports=[12001], host_config={"host": "config"}, ) assert config.env == {"hello": "world"} assert config.image == "testing" assert config.labels == {"a", "b"} assert config.ports == [12001] assert config.host_config == {"host": "config"}
def test_docker_agent_networks_as_modes_can_be_overriden_by_run_config( api, network): api.create_networking_config.return_value = {network: "config1"} api.create_endpoint_config.return_value = "endpoint-config" agent = DockerAgent(networks=[network]) agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "core_version": "0.13.0", }), "run_config": DockerRun(host_config={ "network_mode": "foobar" }).serialize(), "id": "id", "name": "name", })) assert network in agent.networks api.create_networking_config.assert_called_once_with( {network: "endpoint-config"}) _, container_create_kwargs = api.create_container.call_args assert container_create_kwargs["networking_config"] == {network: "config1"} _, host_config_kwargs = api.create_host_config.call_args assert host_config_kwargs["network_mode"] == "foobar"
str(Path(__file__).parent.parent.resolve()) / Path("data/event_data.csv"): "/data/event_data.csv", }, env_vars={ # append modules directory to PYTHONPATH "PYTHONPATH": "$PYTHONPATH:modules/" }, python_dependencies=[ "python-dotenv", "boto3", "botocore", ], ignore_healthchecks=True, # only an extreme poweruser should use this ^ ) run_config = DockerRun( env={"sample_key": "sample_value"}, labels=["docker"], ) with Flow( "Upload to S3", storage=storage, run_config=run_config ) as flow: files_to_download = Parameter( name="File List", default=["data/test_data.csv", "data/user_data.csv", "data/event_data.csv"] ) conn = connect_to_s3() upload_to_s3.map( s3_client=unmapped(conn), file_path=create_filepath.map(files_to_download)
task1 = task_1() task2 = task_2() task3 = task_3() task2.set_upstream(task1) task3.set_upstream(task2) flow2.register(project_name="developer-flows") with Flow( "Staging Environment ML Training", storage=GitHub( repo="kmoonwright/utility_flows", path="enterprise_demo/filler_flows.py", access_token_secret="GITHUB_ACCESS_TOKEN" ), # schedule=Schedule(clocks=[IntervalClock(timedelta(minutes=2))]), run_config=DockerRun(labels=["staging"]) ) as flow3: task1 = task_1() task2 = task_2() task3 = task_3() task2.set_upstream(task1) task3.set_upstream(task2) flow3.register(project_name="staging-flows") with Flow( "Production Environment Pipeline", storage=GitHub( repo="kmoonwright/utility_flows", path="enterprise_demo/filler_flows.py", access_token_secret="GITHUB_ACCESS_TOKEN" ),
print("Here's your data: {}".format(data)) # Some configuration is required, see https://docs.prefect.io/orchestration/flow_config/overview.html with Flow( "ETL", storage=GitHub( repo="dylanbhughes/pgr_examples_2", path="my_flow.py", secrets=["GITHUB_ACCESS_TOKEN"], ref=os.environ["PREFECT_FLOW_BRANCH_NAME"], ), run_config=DockerRun( image="prefecthq/prefect:latest", labels=[os.environ["PREFECT_FLOW_LABEL"]], env={ "PREFECT_FLOW_BRANCH_NAME": os.environ["PREFECT_FLOW_BRANCH_NAME"], "PREFECT_FLOW_LABEL": os.environ["PREFECT_FLOW_LABEL"], "PREFECT_PROJECT_NAME": os.environ["PREFECT_PROJECT_NAME"], }, ), executor=LocalDaskExecutor(scheduler="threads", num_workers=3), ) as flow: e = extract() t = transform.map(e) l = load(t) if __name__ == "__main__": flow.register(os.environ["PREFECT_PROJECT_NAME"])
), ], ) def test_serialize_local_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert sorted(config.labels) == sorted(config2.labels) fields = ["env", "working_dir"] for field in fields: assert getattr(config, field) == getattr(config2, field) @pytest.mark.parametrize( "config", [ DockerRun(), DockerRun(env={"test": "foo"}, image="testing", labels=["a", "b"], ports=[12001]), ], ) def test_serialize_docker_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert sorted(config.labels) == sorted(config2.labels) fields = ["env", "image", "ports"] for field in fields: assert getattr(config, field) == getattr(config2, field)
from prefect import task, Flow from prefect.run_configs import DockerRun from prefect.storage import Docker import numpy as np @task(log_stdout=True) def use_numpy(): return np.arange(10) with Flow("use_numpy") as flow: flow.storage = Docker(python_dependencies=["numpy"]) flow.run_config = DockerRun() nptask = use_numpy() # flow.register(project_name="dockerflow")
from minimal_repo import all_flows LOCAL_CONTAINER_TAG = "local" IMAGE_NAME = "prefect_base" DEV_PROJECT = "dev" def log_stdout_stream(stream): # pylint: disable=missing-function-docstring for line in stream: try: line = line.decode("utf8") obj = json.loads(line) logging.info(obj["stream"]) except: # pylint: disable=bare-except pass client = docker.from_env().api stream = client.build(path=".", tag=f"{IMAGE_NAME}:{LOCAL_CONTAINER_TAG}") log_stdout_stream(stream) for flow_name, flow in all_flows.items(): flow.run_config = DockerRun( image=f"{IMAGE_NAME}:{LOCAL_CONTAINER_TAG}", env={"PREFECT__FLOWS__CHECKPOINTING": "true"}, ) flow.register(project_name=DEV_PROJECT, labels=[os.environ["SHARED_LABEL"]])
def say_bye(): logger = prefect.context.get("logger") logger.info("Bye!") STORAGE = Git( repo_host="github.com", repo="dyvenia/viadot", flow_path="viadot/examples/hello_world.py", branch_name="0.2.3", git_token_secret_name= "github_token", # name of the Prefect secret with the GitHub token ) RUN_CONFIG = DockerRun( image="prefecthq/prefect", env={"SOME_VAR": "value"}, labels=["dev"], ) with Flow("Hello, world!", storage=STORAGE, run_config=RUN_CONFIG) as flow: hello = say_hello() print_answer = show_answer() bye = say_bye() print_answer.set_upstream(hello, flow=flow) bye.set_upstream(print_answer, flow=flow) if __name__ == "__main__": # flow.run() # run locally flow.register(project_name="dev") # deploy
), ], ) def test_serialize_local_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert sorted(config.labels) == sorted(config2.labels) fields = ["env", "working_dir"] for field in fields: assert getattr(config, field) == getattr(config2, field) @pytest.mark.parametrize( "config", [ DockerRun(), DockerRun( env={"test": "foo"}, image="testing", labels=["a", "b"], ), ], ) def test_serialize_docker_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert sorted(config.labels) == sorted(config2.labels) fields = ["env", "image"] for field in fields: assert getattr(config, field) == getattr(config2, field)
return response.text @task def load_file(filename: str) -> str: with open(filename, "r", encoding="utf-8") as file: return file.read() @task def printa(stuff): print(stuff) task = ShellTask(return_all=True) with Flow("shell") as f: translation_server_url = "http://localhost:1969" bibtex = load_file("./workspace/aksw-short.bib") zotero = import_translation(bibtex, translation_server_url) rdf = export_translation(zotero, translation_server_url, "rdf_bibliontology") turtle = task(command="rapper - -o turtle -I www.test.com > tests.ttl") printa(turtle) f.run_config = DockerRun(image="prefecthq/prefect") f.register(project_name="tutoriala") # Configure extra environment variables for this flow, # and set a custom image # f.run()
@task(name="Create DF") def create_df(data): time.sleep(8) @task(name="Connect to Snowflake") def connect_to_snowflake(): time.sleep(4) @task(name="Upload to Snowflake") def upload_to_snowflake(client, data): time.sleep(8) with Flow( "ETL PostgreSQL to Snowflake", storage=GitHub(repo="kmoonwright/utility_flows", path="enterprise_demo/postgres_to_snowflake.py", access_token_secret="GITHUB_ACCESS_TOKEN"), # schedule=Schedule(clocks=[IntervalClock(timedelta(minutes=2))]), run_config=DockerRun(labels=["production"], image="prefecthq/prefect:latest")) as flow: postgres_table = Parameter(name="Table Name Input", default="User") pg_client = connect_to_postgres() query = execute_query(pg_client, postgres_table) df = create_df(query) sf_client = connect_to_snowflake() update_warehouse = upload_to_snowflake(sf_client, df) flow.register(project_name="production-flows")
# Create a prefect's flow object with some configuration flow_nwp_00 = create_flow_download(run=00, **settings) flow_nwp_12 = create_flow_download(run=12, **settings) flow_list = [flow_nwp_00, flow_nwp_12] for flow in flow_list: # Configure how this code will be passed to the prefect agents # In this case, prefect will get this file from github repo_ref = os.getenv("DATAFETCH__STORAGE__REPO__REF", default="master") print(f"Registering Using GitHub repo ref {repo_ref}") flow.storage = GitHub( repo="steph-ben/datafetch-config", # name of repo ref=repo_ref, path="projects/gfs/fetch.py", # location of flow file in repo secrets=["GITHUB_ACCESS_TOKEN"] # name of personal access token secret ) # Configure how this code will be executed # In this case, prefect will run this inside a docker container flow.run_config = DockerRun(image="stephben/datafetch") # Configure where tasks status will be stored # See : # - https://docs.prefect.io/core/concepts/results.html # - https://docs.prefect.io/core/advanced_tutorials/using-results.html flow.result = PrefectResult() if __name__ == "__main__": show_prefect_cli_helper(flow_list=flow_list)
@task def build_df(star_counts): """Concatenate star counts into a single dataframe""" return pd.concat(star_counts, axis=1).fillna(0) @task def make_plot(df): """Make a plot of the star counts and post it as an artifact""" ax = df.plot.line(title="GitHub Stars") ax.set(xlabel="Date", ylabel="Stars") fil = BytesIO() plt.savefig(fil, format="svg") fig_body = fil.getvalue().decode("utf-8") create_markdown(fig_body) with Flow("github_stars") as flow: repos = Parameter("repos", default=["prefecthq/prefect", "dagster-io/dagster"]) token = PrefectSecret("GITHUB_API_TOKEN") star_counts = get_stars.map(repos, token=unmapped(token)) df = build_df(star_counts) make_plot(df) flow.storage = GitHub("jcrist/prefect-github-example", "flows/github_stars.py") flow.run_config = DockerRun(image="jcrist/prefect-github-example")
flow_name="ETL - Docker", project_name="PGR Examples", ) local = StartFlowRun( flow_name="ETL - Local", project_name="PGR Examples", ) with Flow( "Orchestrator Flow", storage=GitHub( repo="dylanbhughes/pgr_examples_3", path="orchestrator.py", secrets=["GITHUB_ACCESS_TOKEN"], ), run_config=DockerRun(image="prefecthq/prefect:latest", labels=["pgr docker"]), executor=LocalDaskExecutor(scheduler="threads", num_workers=3), ) as flow: input_string = Parameter(name="input_string", required=True) manual_switch = Parameter(name="cloud_or_local", required=False, default=None) cloud_or_local_result = run_locally_or_in_cloud( input_string=input_string, manual_switch=manual_switch) switch( cloud_or_local_result, dict( cloud=cloud(parameters={"input_string": input_string}), local=local(parameters={"input_string": input_string}), ),