def test_populate_env_vars_from_run_config(tmpdir): agent = LocalAgent(env_vars={"KEY1": "VAL1", "KEY2": "VAL2"}) working_dir = str(tmpdir) run = LocalRun( env={ "KEY2": "OVERRIDE", "PREFECT__LOGGING__LEVEL": "TEST" }, working_dir=working_dir, ) env_vars = agent.populate_env_vars( GraphQLResult({ "id": "id", "name": "name", "flow": { "id": "foo" }, "run_config": run.serialize(), }), run, ) assert env_vars["KEY1"] == "VAL1" assert env_vars["KEY2"] == "OVERRIDE" assert env_vars["PREFECT__LOGGING__LEVEL"] == "TEST" assert working_dir in env_vars["PYTHONPATH"]
def test_local_agent_deploy_run_config_missing_working_dir(monkeypatch, tmpdir): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) working_dir = str(tmpdir.join("missing")) agent = LocalAgent() with pytest.raises(ValueError, match="nonexistent `working_dir`"): agent.deploy_flow( flow_run=GraphQLResult( { "id": "id", "flow": { "storage": Local().serialize(), "id": "foo", "core_version": "0.13.0", }, "run_config": LocalRun(working_dir=working_dir).serialize(), }, ) ) assert not popen.called assert not agent.processes
def test_generate_job_spec_errors_if_non_kubernetesrun_run_config(self): with pytest.raises( TypeError, match= "`run_config` of type `LocalRun`, only `KubernetesRun` is supported", ): self.agent.generate_job_spec(self.build_flow_run(LocalRun()))
def test_deploy_flow_errors_if_not_ecs_run_config(self): with pytest.raises( TypeError, match= "`run_config` of type `LocalRun`, only `ECSRun` is supported", ): self.deploy_flow(LocalRun())
def test_deploy_flow_errors_if_not_vertex_run_config(self, agent): with pytest.raises( TypeError, match= "`run_config` of type `LocalRun`, only `VertexRun` is supported", ): self.deploy_flow(agent, LocalRun())
def test_docker_agent_deploy_flow_unsupported_run_config(api): agent = DockerAgent() with pytest.raises( TypeError, match= "`run_config` of type `LocalRun`, only `DockerRun` is supported", ): agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "id": "foo", "name": "flow-name", "core_version": "0.13.0", }), "run_config": LocalRun().serialize(), "id": "id", "name": "name", "version": "version", })) assert not api.pull.called
def test_local_agent_deploy_run_config_working_dir(monkeypatch, working_dir, tmpdir): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) if working_dir is not None: working_dir = str(tmpdir) agent = LocalAgent() agent.deploy_flow( flow_run=GraphQLResult( { "id": "id", "flow": { "storage": Local().serialize(), "id": "foo", "core_version": "0.13.0", }, "run_config": LocalRun(working_dir=working_dir).serialize(), }, ) ) assert popen.called assert len(agent.processes) == 1 assert popen.call_args[1]["cwd"] == working_dir
def test_client_register_flow_id_output( patch_post, use_run_config, compressed, monkeypatch, capsys, cloud_api, tmpdir ): if compressed: response = { "data": { "project": [{"id": "proj-id"}], "create_flow_from_compressed_string": {"id": "long-id"}, "flow_by_pk": {"flow_group_id": "fg-id"}, } } else: response = { "data": { "project": [{"id": "proj-id"}], "create_flow": {"id": "long-id"}, "flow_by_pk": {"flow_group_id": "fg-id"}, } } patch_post(response) monkeypatch.setattr( "prefect.client.Client.get_default_tenant_slug", MagicMock(return_value="tslug") ) with set_temporary_config( { "cloud.api": "http://my-cloud.foo", "cloud.auth_token": "secret_token", "backend": "cloud", } ): client = Client() labels = ["test1", "test2"] storage = Local(tmpdir) if use_run_config: flow = prefect.Flow( name="test", storage=storage, run_config=LocalRun(labels=labels) ) flow.environment = None else: flow = prefect.Flow( name="test", storage=storage, environment=LocalEnvironment(labels=labels) ) flow.result = flow.storage.result flow_id = client.register( flow, project_name="my-default-project", compressed=compressed, version_group_id=str(uuid.uuid4()), ) assert flow_id == "long-id" captured = capsys.readouterr() assert "Flow URL: https://cloud.prefect.io/tslug/flow/fg-id\n" in captured.out assert f"Labels: {labels}" in captured.out
def test_prefect_logging_level_override_logic(config, agent_env_vars, run_config_env_vars, expected_logging_level, tmpdir): with set_temporary_config(config): agent = LocalAgent(env_vars=agent_env_vars) run = LocalRun(working_dir=str(tmpdir), env=run_config_env_vars) env_vars = agent.populate_env_vars( GraphQLResult({ "id": "id", "name": "name", "flow": { "id": "foo" }, "run_config": run.serialize(), }), run, ) assert env_vars["PREFECT__LOGGING__LEVEL"] == expected_logging_level
def test_all_args(tmpdir): working_dir = str(tmpdir) config = LocalRun( env={"hello": "world"}, working_dir=working_dir, labels=["a", "b"], ) assert config.env == {"hello": "world"} assert config.working_dir == working_dir assert config.labels == {"a", "b"}
def get_local_run_config() -> LocalRun: """ Return a LocalRun configuration to attach to a flow. Returns: - prefect.run_configs.LocalRun: The local run configuration to be applied to a flow """ return LocalRun( working_dir=ROOT_DIR, env={ "PREFECT__USER_CONFIG_PATH": MFP_CONFIG_PATH, "PYTHONPATH": PYTHONPATH, }, )
def create_flow() -> Flow: """Creates and returns flow object""" # Haven't used different executors enough to know the difference with Flow(FLOW_NAME, run_config=LocalRun()) as flow: country = Parameter("country", default=DEFAULT_COUNTRY) # covid_df = extract_whole_covid_data() covid_df = extract_covid_data_from_file() filtered_covid_df = filter_data(covid_df, country) # Only for whole data, not latest full_df = extract_full_country_data(filtered_covid_df) base_y = extract_label_column(full_df, 'new_cases') cleaned_y = clean_NaN(base_y) # print_head(cleaned_y) base_X = remove_overfit_columns(full_df, DROP_COLUMNS) cleaned_X = clean_NaN(base_X) optimal_X = optimize_feature_columns(cleaned_X, 10, cleaned_y) scaled_X = scale_data(optimal_X) # print_head(scaled_X) train_test_data = split_data(scaled_X, cleaned_y) check_data(train_test_data) # check_for_infinity(train_test_data) # Some issue with my data's format & type while being processed # within the model. # Going back to Kaggle. Maybe I'm using the wrong model? # Just don't know enough yet. # train_model = grid_search(train_test_data) # save_data(covid_df, 'raw', 'whole') return flow
def test_deploy_flow_errors_if_not_ecs_run_config(self): with pytest.raises(TypeError, match="Unsupported RunConfig"): self.deploy_flow(LocalRun())
import pytest from prefect.storage import Docker, Local from prefect.run_configs import KubernetesRun, LocalRun from prefect.utilities.agent import get_flow_image, get_flow_run_command from prefect.utilities.graphql import GraphQLResult @pytest.mark.parametrize("run_config", [KubernetesRun(), LocalRun(), None]) def test_get_flow_image_run_config_docker_storage(run_config): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "id": "id", }), "run_config": run_config.serialize() if run_config else None, "id": "id", }) image = get_flow_image(flow_run) assert image == "test/name:tag" @pytest.mark.parametrize("run_config", [KubernetesRun(), LocalRun(), None]) @pytest.mark.parametrize("version", ["0.13.0", "0.10.0+182.g385a32514.dirty", None])
"cpu_limit", "cpu_request", "memory_limit", "memory_request", "service_account_name", "image_pull_secrets", "image_pull_policy", ] for field in fields: assert getattr(config, field) == getattr(config2, field) @pytest.mark.parametrize( "config", [ LocalRun(), LocalRun( env={"test": "foo"}, working_dir="/path/to/dir", labels=["a", "b"], ), ], ) def test_serialize_local_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert sorted(config.labels) == sorted(config2.labels) fields = ["env", "working_dir"] for field in fields: assert getattr(config, field) == getattr(config2, field)
table_name='example_data') data: dd.DataFrame = ML.feature_engineering(data=data) data: dd.DataFrame = ML.inference(data=data) IO.write_to_S3(bucket_name=bucket_name, folder_name='out', table_name='example_data') flow.storage = GitHub( # prefect register -f _flow.py repo="Brontomerus/ml-workflows", ref="master", path="/workflows/ml/flow.py", secrets=["GITHUB_ACCESS_TOKEN"]) flow.run_config = LocalRun(labels=['dev']) flow.executor = DaskExecutor( cluster_class="dask_cloudprovider.aws.FargateCluster", cluster_kwargs={ "image": "daskdev/dask:2021.4.1", "fargate_use_private_ip": True, "n_workers": 2, "scheduler_timeout": "4 minutes", "worker_cpu": 2048, #2048 "worker_mem":
with Flow( "map_100_docker", # storage=Docker( # registry_url="joshmeek18", # image_name="flows", # ), ) as flow: v1 = values() do_something.map(v1) v2 = values() do_something.map(v2) v3 = values() do_something.map(v3) v4 = values() do_something.map(v4) flow.run_config = LocalRun() # from prefect.environments import LocalEnvironment from prefect.engine.executors import LocalDaskExecutor flow.executor = LocalDaskExecutor() # flow.environment flow.register(project_name="Demo")
import os from prefect import Flow, task, Parameter from prefect.storage import GitHub from prefect.run_configs import LocalRun @task(log_stdout=True) def greet(name): greeting = os.environ.get("GREETING", "Hello") print(f"{greeting}, {name}!") with Flow("test-github") as flow: name = Parameter("name") greet(name) flow.storage = GitHub("jcrist/prefect-hacking", path="test_github.py") flow.run_config = LocalRun(env={"GREETING": "Hello"})
name="SQL-stuff" # commit: bool = False, ) #-------------------------------------------------------------- # Flow context #-------------------------------------------------------------- with Flow("github_flow") as f: password = EnvVarSecret(prefect.config.sql_server.password_var) logger = prefect.context.get("logger") thing = Parameter("thing", default=["Thing 1"]) d = dog(thing) s = sql_task(password=password) v = view_sql(s) #-------------------------------------------------------------- # Closing Details #-------------------------------------------------------------- f.run_config = LocalRun(env={ "PREFECT__USER_CONFIG_PATH": '/Users/peytonrunyan/TRP/prefect/config.toml' }) f.storage = GitHub(repo="peyton-trp/prefect-test", path="simple_flow.py", secrets=["GITHUB_ACCESS_TOKEN"]) f.register("cat_flow")
def test_no_args(): config = LocalRun() assert config.env is None assert config.working_dir is None assert config.labels == set()
from prefect import Flow, task from prefect.executors import DaskExecutor from prefect.run_configs import LocalRun import time @task def get_vals(): return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @task def print_val(v): time.sleep(10) print(v) with Flow("dask-test", executor=DaskExecutor(), run_config=LocalRun()) as f: v = get_vals() print_val.map(v) f.register("Demo")
def run_config(self) -> RunConfig: return LocalRun(env=self._generate_env())
@task(log_stdout=True) def extract(input_string): print(input_string) return [1, 2, 3, 4, 5, 6] @task def transform(number): return number * 2 @task def load(numbers): print(f"Uploaded {numbers} to Snowflake") with Flow( "ETL - Local", storage=GitHub( repo="dylanbhughes/pgr_examples_3", path="local_flow.py", secrets=["GITHUB_ACCESS_TOKEN"], ), run_config=LocalRun(labels=["pgr local"]), executor=LocalDaskExecutor(scheduler="threads", num_workers=3), ) as flow: input_string = Parameter(name="input_string", required=True) numbers = extract(input_string=input_string) tranformed_numbers = transform.map(numbers) result = load(numbers=tranformed_numbers)
interval = randrange(0, 60) logger.info(interval) time.sleep(interval) if interval > 50: logger.info("Failing flow...") raise signals.FAIL() with Flow( "Data Warehouse ETL", storage=GitHub( repo="kmoonwright/utility_flows", path="enterprise_demo/filler_flows.py", access_token_secret="GITHUB_ACCESS_TOKEN" ), # schedule=Schedule(clocks=[IntervalClock(timedelta(minutes=2))]), run_config=LocalRun(labels=["local"]) ) as flow1: task1 = task_1() task2 = task_2() task3 = task_3() task2.set_upstream(task1) task3.set_upstream(task2) flow1.register(project_name="data-warehouse") with Flow( "Dev Environment ML Training", storage=GitHub( repo="kmoonwright/utility_flows", path="enterprise_demo/filler_flows.py", access_token_secret="GITHUB_ACCESS_TOKEN" ),
def test_working_dir_relpath_to_abspath(): relpath = os.path.join("local", "path") abspath = os.path.abspath(relpath) config = LocalRun(working_dir=relpath) assert config.working_dir == abspath