def test_docker_agent_deploy_flow_does_not_include_host_gateway_for_old_engine_versions( api, docker_engine_version): api.version.return_value = {"Version": docker_engine_version} run = UniversalRun() storage = Local() agent = DockerAgent() with pytest.warns( UserWarning, match= ("`host.docker.internal` could not be automatically resolved.*" f"feature is not supported on Docker Engine v{docker_engine_version}" ), ): agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": storage.serialize(), "core_version": "0.13.11", }), "run_config": run.serialize() if run else None, "id": "id", "name": "name", })) assert "extra_hosts" not in api.create_host_config.call_args[1]
def prepare_flows(flows: "List[FlowLike]", labels: List[str] = None) -> None: """Finish preparing flows. Shared code between `register` and `build` for any flow modifications required before building the flow's storage. Modifies the flows in-place. """ labels = set(labels or ()) # Finish setting up all flows before building, to ensure a stable hash # for flows sharing storage instances for flow in flows: if isinstance(flow, dict): # Add any extra labels to the flow new_labels = set(flow["run_config"].get("labels") or []).union(labels) flow["run_config"]["labels"] = sorted(new_labels) else: # Set the default flow result if not specified if not flow.result: flow.result = flow.storage.result # Add a `run_config` if not configured explicitly if flow.run_config is None: flow.run_config = UniversalRun() # Add any extra labels to the flow (either specified via the CLI, # or from the storage object). flow.run_config.labels.update(labels) flow.run_config.labels.update(flow.storage.labels) # Add the flow to storage flow.storage.add_flow(flow)
def test_environment_overrides(self, project, region): # The precedent order is agent, then run, except for the required env vars agent = VertexAgent( project, region, env_vars={ "a": 0, "b": 2, "PREFECT__LOGGING__LEVEL": "test0" }, ) run_config = UniversalRun(env={ "a": 1, "c": 2, "PREFECT__LOGGING__LEVEL": "test1" }) flow_run = graphql_result(run_config) env = agent.populate_env_vars(flow_run) expected = self.DEFAULT.copy() expected["PREFECT__LOGGING__LEVEL"] = "test1" expected["a"] = 1 expected["b"] = 2 expected["c"] = 2 assert env == expected
def test_initialization(self, cloud_api): now = pendulum.now() run_config = UniversalRun() # verify that the task is initialized as expected task = StartFlowRun( name="My Flow Run Task", checkpoint=False, project_name="Test Project", flow_name="Test Flow", new_flow_context={"foo": "bar"}, parameters={"test": "ing"}, run_config=run_config, run_name="test-run", scheduled_start_time=now, ) assert task.name == "My Flow Run Task" assert task.checkpoint is False assert task.project_name == "Test Project" assert task.flow_name == "Test Flow" assert task.new_flow_context == {"foo": "bar"} assert task.parameters == {"test": "ing"} assert task.run_config == run_config assert task.run_name == "test-run" assert task.scheduled_start_time == now
def test_flow_run_task_submit_args(self, client, cloud_api, idempotency_key, task_run_id): run_config = UniversalRun() # verify that create_flow_run was called task = StartFlowRun( project_name="Test Project", flow_name="Test Flow", parameters={"test": "ing"}, run_config=run_config, run_name="test-run", ) # verify that run returns the new flow run ID with prefect.context(task_run_id=task_run_id): assert task.run(idempotency_key=idempotency_key) == "xyz890" # verify the GraphQL query was called with the correct arguments query_args = list( client.graphql.call_args_list[0][0][0]["query"].keys())[0] assert "Test Project" in query_args assert "Test Flow" in query_args # verify create_flow_run was called with the correct arguments assert client.create_flow_run.call_args[1] == dict( flow_id="abc123", parameters={"test": "ing"}, run_config=run_config, idempotency_key=idempotency_key or task_run_id, context=None, run_name="test-run", scheduled_start_time=None, )
async def test_create_flow_run_run_config_and_labels( self, tenant_id, project_id, set_run_config, set_group_run_config, set_labels, set_group_labels, ): """Check that a flow-run's run config and labels take the following precedence: - run_config: flow run, flow group, flow - labels: flow run, flow run run_config, flow group, flow group run_config, flow run_config """ labels = ["from-flow"] flow_id = await api.flows.create_flow( project_id=project_id, serialized_flow=prefect.Flow( name="test", run_config=UniversalRun(labels=labels)).serialize(), ) flow = await models.Flow.where(id=flow_id ).first({"flow_group_id", "run_config"}) run_config = flow.run_config run_kwargs = {} if set_group_run_config: labels = ["from-group-run-config"] run_config = UniversalRun(labels=labels).serialize() await api.flow_groups.set_flow_group_run_config( flow_group_id=flow.flow_group_id, run_config=run_config) if set_group_labels: labels = ["from-group"] await api.flow_groups.set_flow_group_labels( flow_group_id=flow.flow_group_id, labels=labels) if set_run_config: labels = ["from-run-config"] run_kwargs["run_config"] = run_config = UniversalRun( labels=labels).serialize() if set_labels: run_kwargs["labels"] = labels = ["from-run"] # create a run flow_run_id = await api.runs.create_flow_run(flow_id=flow_id, **run_kwargs) flow_run = await models.FlowRun.where(id=flow_run_id ).first({"labels", "run_config"}) assert flow_run.labels == labels assert flow_run.run_config == run_config
def test_get_flow_image_raises_on_missing_info(): flow = Flow( "test", run_config=UniversalRun(), storage=Local(), ) with pytest.raises(ValueError): get_flow_image(flow=flow)
def test_docker_agent_deploy_flow_run_config(api, run_kind, has_docker_storage): if has_docker_storage: storage = Docker(registry_url="testing", image_name="on-storage", image_tag="tag") image = "testing/on-storage:tag" else: storage = Local() image = "on-run-config" if run_kind == "docker" else "prefecthq/prefect:0.13.11" if run_kind == "docker": env = {"TESTING": "VALUE"} host_config = {"auto_remove": False, "shm_size": "128m"} exp_host_config = { "auto_remove": False, "extra_hosts": { "host.docker.internal": "host-gateway" }, "shm_size": "128m", } run = DockerRun(image=image, env=env, host_config=host_config) else: env = {} host_config = {} exp_host_config = { "auto_remove": True, "extra_hosts": { "host.docker.internal": "host-gateway" }, } run = None if run_kind == "missing" else UniversalRun() agent = DockerAgent() agent.deploy_flow(flow_run=GraphQLResult({ "flow": GraphQLResult({ "id": "foo", "name": "flow-name", "storage": storage.serialize(), "core_version": "0.13.11", }), "run_config": run.serialize() if run else None, "id": "id", "name": "name", })) assert api.create_container.called assert api.create_container.call_args[0][0] == image res_env = api.create_container.call_args[1]["environment"] for k, v in env.items(): assert res_env[k] == v res_host_config = api.create_host_config.call_args[1] for k, v in exp_host_config.items(): assert res_host_config[k] == v
def test_get_flow_image_docker_storage(): flow = Flow( "test", run_config=UniversalRun(), storage=Docker(registry_url="test", image_name="name", image_tag="tag"), ) image = get_flow_image(flow=flow) assert image == "test/name:tag"
def test_environment_has_agent_token_from_config(self, agent): with set_temporary_config({"cloud.agent.auth_token": "TEST_TOKEN"}): run_config = UniversalRun() flow_run = graphql_result(run_config) env = agent.populate_env_vars(flow_run) expected = self.DEFAULT.copy() expected["PREFECT__CLOUD__AUTH_TOKEN"] = "TEST_TOKEN" assert env == expected
def test_env_list(self, agent): # test to ensure the content in the env list is the expected format run_config = UniversalRun(env={"a": 1}) flow_run = graphql_result(run_config) task_def = agent.generate_task_definition(flow_run) env_list = task_def["job_spec"]["worker_pool_specs"][0][ "container_spec"]["env"] assert {"name": "a", "value": 1} in env_list
def test_environment_has_api_key_from_config(self, agent, config_with_api_key): run_config = UniversalRun() flow_run = graphql_result(run_config) env = agent.populate_env_vars(flow_run) expected = self.DEFAULT.copy() expected[ "PREFECT__CLOUD__API_KEY"] == config_with_api_key.cloud.api_key expected[ "PREFECT__CLOUD__AUTH_TOKEN"] == config_with_api_key.cloud.api_key expected[ "PREFECT__CLOUD__TENANT_ID"] == config_with_api_key.cloud.tenant_id assert env == expected
def test_environment_has_api_key_from_config(self, agent, tenant_id): with set_temporary_config({ "cloud.api_key": "TEST_KEY", "cloud.tenant_id": tenant_id, "cloud.agent.auth_token": None, }): run_config = UniversalRun() flow_run = graphql_result(run_config) env = agent.populate_env_vars(flow_run) expected = self.DEFAULT.copy() expected["PREFECT__CLOUD__API_KEY"] == "TEST_KEY" expected["PREFECT__CLOUD__AUTH_TOKEN"] == "TEST_KEY" expected["PREFECT__CLOUD__TENANT_ID"] == "ID" assert env == expected
def test_deploy_flow_job_spec(self, aiplatform, agent, region, project): aiplatform.create_custom_job.return_value = box.Box( name= "/projects/abc/locations/us-central1/customJobs/custom_job_name") result = self.deploy_flow(agent, UniversalRun()) # Check that we repsected the vertex response name in the url info assert result.endswith(f"{agent.region_name}/training/custom_job_name") aiplatform.create_custom_job.assert_called_once() # correct region and project assert (aiplatform.create_custom_job.call_args[1]["parent"] == f"projects/{project}/locations/{region}") # correct job spec for a default job assert (aiplatform.create_custom_job.call_args[1]["custom_job"] == self.DEFAULT_JOB)
def test_docker_agent_deploy_flow_run_config(api, run_kind, has_docker_storage): if has_docker_storage: storage = Docker( registry_url="testing", image_name="on-storage", image_tag="tag" ) image = "testing/on-storage:tag" else: storage = Local() image = "on-run-config" if run_kind == "docker" else "prefecthq/prefect:0.13.11" if run_kind == "docker": env = {"TESTING": "VALUE"} run = DockerRun(image=image, env=env) else: env = {} run = None if run_kind == "missing" else UniversalRun() agent = DockerAgent() agent.deploy_flow( flow_run=GraphQLResult( { "flow": GraphQLResult( { "id": "foo", "name": "flow-name", "storage": storage.serialize(), "core_version": "0.13.11", } ), "run_config": run.serialize() if run else None, "id": "id", "name": "name", } ) ) assert api.create_container.called assert api.create_container.call_args[0][0] == image res_env = api.create_container.call_args[1]["environment"] for k, v in env.items(): assert res_env[k] == v
def test_generate_task_definition_defaults(self, agent): run_config = UniversalRun() flow_run = graphql_result(run_config) task_def = agent.generate_task_definition(flow_run) job_spec = task_def["job_spec"] pool_spec = job_spec["worker_pool_specs"][0] env_list = agent._to_env_list(agent.populate_env_vars(flow_run)) assert task_def["display_name"] for unspecified in ["network", "service_account", "scheduling"]: assert job_spec.get(unspecified) is None assert pool_spec["machine_spec"] == {"machine_type": "e2-standard-4"} assert pool_spec["replica_count"] == 1 assert pool_spec["container_spec"] == { "image_uri": "prefecthq/prefect:0.13.0", # from the core version above "command": ["prefect", "execute", "flow-run"], "args": [], "env": env_list, }
def test_environment_defaults(self, agent): run_config = UniversalRun() flow_run = graphql_result(run_config) env = agent.populate_env_vars(flow_run) assert env == self.DEFAULT
def test_build(self, tmpdir, filter_names, update): path = str(tmpdir.join("test.py")) source = textwrap.dedent(""" from prefect import Flow from prefect.run_configs import LocalRun flow1 = Flow("flow 1") flow2 = Flow("flow 2", run_config=LocalRun(labels=["new"])) """) with open(path, "w") as f: f.write(source) out_path = str(tmpdir.join("flows.json")) if update: orig_flows = [ Flow("flow 2", run_config=UniversalRun(labels=["orig"])), Flow("flow 3"), ] orig = { "version": 1, "flows": [f.serialize(build=False) for f in orig_flows], } with open(out_path, "w") as f: json.dump(orig, f) cmd = ["build", "--path", path, "-l", "a", "-l", "b", "-o", out_path] if filter_names: cmd.extend(["--name", "flow 2"]) if update: cmd.append("--update") result = CliRunner().invoke(cli, cmd) assert result.exit_code == 0 with open(out_path, "rb") as f: out = json.load(f) assert out["version"] == 1 assert out["flows"] if filter_names: build_names = ["flow 2"] flow2 = out["flows"][0] else: build_names = ["flow 1", "flow 2"] flow2 = out["flows"][1] exp_names = build_names + ["flow 3"] if update else build_names written_names = [f["name"] for f in out["flows"]] assert written_names == exp_names storage_labels = Local().labels assert set(flow2["run_config"]["labels"]) == { "a", "b", "new", *storage_labels } assert flow2["run_config"]["type"] == "LocalRun" build_logs = "\n".join( f" Building `Local` storage...\n Building '{name}'... Done" for name in build_names) out = ( f"Collecting flows...\n" f"Processing {path!r}:\n" f"{build_logs}\n" f"Writing output to {out_path!r}\n" f"========================== {len(build_names)} built ==========================\n" ) assert result.stdout == out
def test_generate_flow_run_environ(): with set_temporary_config({ "cloud.send_flow_run_logs": "CONFIG_SEND_RUN_LOGS", "backend": "CONFIG_BACKEND", "cloud.api": "CONFIG_API", "cloud.tenant_id": "CONFIG_TENANT_ID", # Deprecated tokens are included if available but overriden by `run_api_key` "cloud.agent.auth_token": "CONFIG_AUTH_TOKEN", "cloud.auth_token": None, }): result = generate_flow_run_environ( flow_run_id="flow-run-id", flow_id="flow-id", run_config=UniversalRun( env={ # Run config should take precendence for these values "A": "RUN_CONFIG", "B": "RUN_CONFIG", "C": None, # Null values are excluded # Should not be overridable using a run config "PREFECT__CONTEXT__FLOW_RUN_ID": "RUN_CONFIG", "PREFECT__CONTEXT__FLOW_ID": "RUN_CONFIG", "PREFECT__CLOUD__API_KEY": "RUN_CONFIG", "PREFECT__CLOUD__TENANT_ID": "RUN_CONFIG", "PREFECT__CLOUD__API": "RUN_CONFIG", "PREFECT__BACKEND": "RUN_CONFIG", }), run_api_key="api-key", ) assert result == { # Passed via kwargs directly "PREFECT__CONTEXT__FLOW_RUN_ID": "flow-run-id", "PREFECT__CONTEXT__FLOW_ID": "flow-id", "PREFECT__CLOUD__API_KEY": "api-key", "PREFECT__CLOUD__AUTH_TOKEN": "api-key", # Backwards compatibility for tokens # Set from prefect config "PREFECT__LOGGING__LEVEL": prefect.config.logging.level, "PREFECT__LOGGING__FORMAT": prefect.config.logging.format, "PREFECT__LOGGING__DATEFMT": prefect.config.logging.datefmt, "PREFECT__CLOUD__SEND_FLOW_RUN_LOGS": "CONFIG_SEND_RUN_LOGS", "PREFECT__BACKEND": "CONFIG_BACKEND", "PREFECT__CLOUD__API": "CONFIG_API", "PREFECT__CLOUD__TENANT_ID": "CONFIG_TENANT_ID", # Overridden by run config "A": "RUN_CONFIG", "B": "RUN_CONFIG", # Hard-coded "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", }
from prefect.run_configs import KubernetesRun, LocalRun, DockerRun, ECSRun, UniversalRun from prefect.serialization.run_config import RunConfigSchema, RunConfigSchemaBase def test_serialized_run_config_sorts_labels(): assert RunConfigSchemaBase().dump({"labels": ["b", "c", "a"]})["labels"] == [ "a", "b", "c", ] @pytest.mark.parametrize( "config", [UniversalRun(), UniversalRun(env={"FOO": "BAR"}, labels=["a", "b"])]) def test_serialize_universal_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert (config.env) == config2.env assert sorted(config.labels) == sorted(config2.labels) @pytest.mark.parametrize( "config", [ KubernetesRun(), KubernetesRun( job_template_path="s3://bucket/test.yaml", image="myimage",
def build_and_register( client: "prefect.Client", flows: "List[prefect.Flow]", project: str, labels: List[str] = None, force: bool = False, ) -> Counter: """Build and register all flows. Args: - client (prefect.Client): the prefect client to use - flows (List[prefect.Flow]): the flows to register - project (str): the project in which to register the flows - labels (List[str], optional): Any extra labels to set on all flows - force (bool, optional): If false (default), an idempotency key will be used to avoid unnecessary register calls. Returns: - Counter: stats about the number of successful, failed, and skipped flows. """ labels = set(labels) if labels else None # Finish setting up all flows before building, to ensure a stable hash # for flows sharing storage instances for flow in flows: # Set the default flow result if not specified if not flow.result: flow.result = flow.storage.result # Add a `run_config` if not configured explicitly # Also add any extra labels to the flow if flow.run_config is None: if flow.environment is not None: flow.environment.labels.update(labels) else: flow.run_config = UniversalRun(labels=labels) else: flow.run_config.labels.update(labels) # Group flows by storage instance. # Also adds all flows to their respective storage instance. storage_to_flows = defaultdict(list) for flow in flows: flow.storage.add_flow(flow) storage_to_flows[flow.storage].append(flow) # Register each flow, building storage as needed. # Stats on success/fail/skip rates are kept for later display stats = Counter(registered=0, errored=0, skipped=0) for storage, flows in storage_to_flows.items(): # Build storage click.echo(f" Building `{type(storage).__name__}` storage...") try: storage.build() except Exception as exc: click.secho(" Error building storage:", fg="red") log_exception(exc, indent=6) red_error = click.style("Error", fg="red") for flow in flows: click.echo(f" Registering {flow.name!r}... {red_error}") stats["errored"] += 1 continue for flow in flows: click.echo(f" Registering {flow.name!r}...", nl=False) try: # Get most recent flow id for this flow. This can be removed once # the registration graphql routes return more information resp = client.graphql({ "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": flow.name }, "project": { "name": { "_eq": project } }, } }, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id", "version"} } }) if resp.data.flow: prev_id = resp.data.flow[0].id prev_version = resp.data.flow[0].version else: prev_id = None prev_version = 0 new_id = client.register( flow=flow, project_name=project, build=False, no_url=True, idempotency_key=(None if force else flow.serialized_hash()), ) except Exception as exc: click.secho(" Error", fg="red") log_exception(exc, indent=4) stats["errored"] += 1 else: if new_id == prev_id: click.secho(" Skipped", fg="yellow") stats["skipped"] += 1 else: click.secho(" Done", fg="green") click.echo(f" └── ID: {new_id}") click.echo(f" └── Version: {prev_version + 1}") stats["registered"] += 1 return stats
def build_example(path): """Build an example located at a specific path. Args: - path (str): the path to the example source file. Returns: - markdown (str): the rendered example in markdown - flows (Dict[str, Flow]): the flows found in the example """ from prefect import Flow from prefect.storage import GitHub from prefect.run_configs import UniversalRun # Use the current commit (if specified in the environment) ref = os.getenv("GIT_SHA", "master") with open(path, "r", encoding="utf-8") as f: contents = f.read() namespace = {} exec(contents, namespace) try: header = namespace["__doc__"] tree = ast.parse(contents) offset = tree.body[1].lineno - 1 except Exception as exc: raise ValueError( f"No docstring header found for example at {path}") from exc flows = {} relpath = os.path.relpath(path, start=ROOT) for f in namespace.values(): if isinstance(f, Flow): f.storage = GitHub("PrefectHQ/prefect", path=relpath, ref=ref) if not f.run_config: f.run_config = UniversalRun() f.run_config.labels.add("prefect-examples") flows[f.name] = f.serialize(build=True) source = "\n".join(contents.splitlines()[offset:]).strip() res = subprocess.run( [sys.executable, path], capture_output=True, check=True, env={"PREFECT__LOGGING__FORMAT": "%(levelname)s | %(message)s"}, ) output = res.stdout.decode("utf-8").strip() register_lines = [ f"prefect register --json https://docs.prefect.io/examples.json" ] for name in sorted(flows): register_lines.append(f" --name {name!r}") register_lines.append(f" --project 'Prefect Examples'") rendered = EXAMPLE_TEMPLATE.format( header=header, source=source, output=output, ref=ref, relpath=relpath, register_cmd=" \\\n".join(register_lines), ).lstrip() return rendered, flows
{ "timestamp": pendulum.now().subtract(seconds=20).isoformat(), "serialized_state": Submitted(message="past-state").serialize(), }, ], "parameters": { "param": "value" }, "context": { "foo": "bar" }, "labels": ["label"], "updated": pendulum.now().isoformat(), "run_config": UniversalRun().serialize(), } FLOW_RUN_DATA_2 = { "id": "id-2", "name": "name-2", "flow_id": "flow_id-2", "serialized_state": Success(message="state-2").serialize(), "states": [ { "timestamp": pendulum.now().subtract(seconds=10).isoformat(), "serialized_state": Running(message="past-state").serialize(), },
def test_build_and_register(self, capsys, monkeypatch, force): """Build and register a few flows: - 1 new flow - 1 updated flow - 1 skipped flow - 1 error during registration - 2 sharing the same storage (which fails to build properly) """ build_call_count = 0 class MyModule(Module): def build(self): nonlocal build_call_count build_call_count += 1 class BadStorage(Module): def build(self): raise ValueError("whoops!") client = MagicMock() client.graphql.side_effect = [ GraphQLResult({"data": {"flow": []}}), GraphQLResult({"data": {"flow": [{"id": "old-id-2", "version": 1}]}}), GraphQLResult({"data": {"flow": [{"id": "old-id-3", "version": 2}]}}), GraphQLResult({"data": {"flow": [{"id": "old-id-4", "version": 3}]}}), ] client.register.side_effect = [ "new-id-1", "old-id-2", "new-id-3", ValueError("Oh no!"), ] storage1 = MyModule("testing") storage1.result = LocalResult() flow1 = Flow("flow 1", storage=storage1, run_config=UniversalRun(labels=["a"])) flow2 = Flow( "flow 2", storage=MyModule("testing"), environment=LocalEnvironment(labels=["a"]), ) storage2 = MyModule("testing") flow3 = Flow("flow 3", storage=storage2) flow4 = Flow("flow 4", storage=storage2) storage3 = BadStorage("testing") flow5 = Flow("flow 5", storage=storage3) flow6 = Flow("flow 6", storage=storage3) flows = [flow1, flow2, flow3, flow4, flow5, flow6] stats = build_and_register( client, flows, "testing", labels=["b", "c"], force=force ) # 3 calls (one for each unique `MyModule` storage object) assert build_call_count == 3 # 4 register calls (6 - 2 that failed to build storage) assert client.register.call_count == 4 for flow, (args, kwargs) in zip(flows, client.register.call_args_list): assert not args assert kwargs["flow"] is flow assert kwargs["project_name"] == "testing" assert kwargs["build"] is False assert kwargs["no_url"] is True if force: assert kwargs["idempotency_key"] is None else: assert kwargs["idempotency_key"] # Stats are recorded properly assert dict(stats) == {"registered": 2, "skipped": 1, "errored": 3} # Flows are properly configured assert flow1.result is storage1.result assert flow1.run_config.labels == {"a", "b", "c"} assert flow2.environment.labels == {"a", "b", "c"} assert isinstance(flow3.run_config, UniversalRun) assert flow3.run_config.labels == {"b", "c"} assert isinstance(flow4.run_config, UniversalRun) assert flow4.run_config.labels == {"b", "c"} # The output contains a traceback, which will vary between machines # We only check that the following fixed sections exist in the output parts = [ ( " Building `MyModule` storage...\n" " Registering 'flow 1'... Done\n" " └── ID: new-id-1\n" " └── Version: 1\n" " Building `MyModule` storage...\n" " Registering 'flow 2'... Skipped\n" " Building `MyModule` storage...\n" " Registering 'flow 3'... Done\n" " └── ID: new-id-3\n" " └── Version: 3\n" " Registering 'flow 4'... Error\n" " Traceback (most recent call last):\n" ), ( " ValueError: Oh no!\n" "\n" " Building `BadStorage` storage...\n" " Error building storage:\n" " Traceback (most recent call last):\n" ), ( " ValueError: whoops!\n" "\n" " Registering 'flow 5'... Error\n" " Registering 'flow 6'... Error\n" ), ] out, err = capsys.readouterr() assert not err for part in parts: assert part in out
class TestCreateFlowRun: def test_does_not_accept_both_id_and_name(self): with pytest.raises(ValueError, match="Received both `flow_id` and `flow_name`"): create_flow_run.run(flow_id=uuid4(), flow_name="foo") def test_requires_id_or_name(self): with pytest.raises(ValueError, match="`flow_id` and `flow_name` are null"): create_flow_run.run(flow_id=None, flow_name=None) @pytest.mark.parametrize( "kwargs", [ { "flow_id": "flow-id" }, { "flow_name": "flow-name" }, { "flow_name": "flow-name", "project_name": "project-name" }, ], ) def test_lookup_uses_given_identifiers(self, kwargs, MockFlowView, MockClient): create_flow_run.run(**kwargs) if "flow_id" in kwargs: MockFlowView.from_id.assert_called_once_with("flow-id") elif "flow_name" in kwargs: MockFlowView.from_flow_name.assert_called_once_with( "flow-name", project_name=kwargs.get("project_name", "")) def test_creates_flow_run_with_defaults(self, MockFlowView, MockClient): MockFlowView.from_id.return_value.flow_id = "flow-id" create_flow_run.run(flow_id="flow-id") MockClient().create_flow_run.assert_called_once_with( flow_id="flow-id", parameters=None, run_name=None, labels=None, context=None, run_config=None, scheduled_start_time=None, idempotency_key=None, ) @pytest.mark.parametrize( "kwargs", [ { "parameters": dict(x=1, y="foo") }, { "run_name": "run-name" }, { "labels": ["a", "b"] }, { "context": { "var": "val" } }, { "run_config": UniversalRun(env={"x"}) }, { "scheduled_start_time": pendulum.now().add(days=1) }, ], ) def test_creates_flow_with_given_settings(self, MockFlowView, MockClient, kwargs): MockFlowView.from_id.return_value.flow_id = "flow-id" create_flow_run.run(flow_id="flow-id", **kwargs) MockClient().create_flow_run.assert_called_once_with( flow_id="flow-id", parameters=kwargs.get("parameters"), run_name=kwargs.get("run_name"), labels=kwargs.get("labels"), context=kwargs.get("context"), run_config=kwargs.get("run_config"), scheduled_start_time=kwargs.get("scheduled_start_time"), idempotency_key=None, ) @pytest.mark.parametrize( "kwargs", [ { "scheduled_start_time": pendulum.duration(days=1) }, ], ) def test_creates_flow_in_future(self, MockFlowView, MockClient, monkeypatch, kwargs): MockFlowView.from_id.return_value.flow_id = "flow-id" # Mocking the concept of "now" so we can have consistent assertions now = pendulum.now("utc") mock_now = MagicMock(return_value=now) monkeypatch.setattr("prefect.client.client.pendulum.now", mock_now) create_flow_run.run(flow_id="flow-id", **kwargs) MockClient().create_flow_run.assert_called_once_with( flow_id="flow-id", parameters=kwargs.get("parameters"), run_name=kwargs.get("run_name"), labels=kwargs.get("labels"), context=kwargs.get("context"), run_config=kwargs.get("run_config"), scheduled_start_time=pendulum.now("utc") + kwargs.get("scheduled_start_time"), idempotency_key=None, ) def test_generates_run_name_from_parent_and_child(self, MockFlowView, MockClient): MockFlowView.from_id.return_value.flow_id = "flow-id" MockFlowView.from_id.return_value.name = "child-name" with prefect.context(flow_run_name="parent-run", task_run_id="parent-task-run"): create_flow_run.run(flow_id="flow-id") MockClient().create_flow_run.assert_called_once_with( flow_id="flow-id", parameters=None, run_name="parent-run-child-name", labels=None, context=None, run_config=None, scheduled_start_time=None, idempotency_key="parent-task-run", ) def test_returns_flow_run_idl(self, MockFlowView, MockClient): MockClient().create_flow_run.return_value = "flow-run-id" result = create_flow_run.run(flow_id="flow-id") assert result == "flow-run-id" def test_displays_flow_run_url(self, MockFlowView, MockClient, caplog): MockClient().create_flow_run.return_value = "flow-run-id" MockClient().get_cloud_url.return_value = "fake-url" create_flow_run.run(flow_id="flow-id") MockClient().get_cloud_url.assert_called_once_with("flow-run", "flow-run-id", as_user=False) assert "Created flow run '<generated-name>': fake-url" in caplog.text
agent.deploy_flow(flow_run=GraphQLResult( { "id": "id", "flow": { "storage": Local().serialize(), "id": "foo", "core_version": "0.13.0", }, "run_config": KubernetesRun().serialize(), }, )) assert not popen.called assert len(agent.processes) == 0 @pytest.mark.parametrize("run_config", [None, UniversalRun()]) def test_local_agent_deploy_null_or_univeral_run_config( monkeypatch, run_config): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) agent = LocalAgent() agent.deploy_flow(flow_run=GraphQLResult( { "id": "id", "flow": { "storage": Local().serialize(), "id": "foo", "core_version": "0.13.0", },
import pytest <<<<<<< HEAD from prefect.run_configs import KubernetesRun, LocalRun, DockerRun, ECSRun, UniversalRun from prefect.serialization.run_config import RunConfigSchema, RunConfigSchemaBase def test_serialized_run_config_sorts_labels(): assert RunConfigSchemaBase().dump({"labels": ["b", "c", "a"]})["labels"] == [ "a", "b", "c", ] @pytest.mark.parametrize("config", [UniversalRun(), UniversalRun(labels=["a", "b"])]) def test_serialize_universal_run(config): msg = RunConfigSchema().dump(config) config2 = RunConfigSchema().load(msg) assert sorted(config.labels) == sorted(config2.labels) ======= from prefect.run_configs import KubernetesRun, LocalRun, DockerRun, ECSRun from prefect.serialization.run_config import RunConfigSchema >>>>>>> prefect clone @pytest.mark.parametrize( "config", [ KubernetesRun(), KubernetesRun(
class TestDeployFlow: def deploy_flow(self, run_config, storage=None, **kwargs): agent = ECSAgent(**kwargs) flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": (storage or Local()).serialize(), "id": "flow-id", "version": 1, "name": "Test Flow", "core_version": "0.13.0", }), "run_config": run_config.serialize() if run_config else None, "id": "flow-run-id", }) return agent.deploy_flow(flow_run) def test_deploy_flow_errors_if_not_ecs_run_config(self): with pytest.raises( TypeError, match= "`run_config` of type `LocalRun`, only `ECSRun` is supported", ): self.deploy_flow(LocalRun()) def test_deploy_flow_errors_if_mix_task_definition_arn_and_docker_storage( self): with pytest.raises( ValueError, match= "Cannot provide `task_definition_arn` when using `Docker` storage", ): self.deploy_flow( ECSRun(task_definition_arn="my-taskdef-arn"), storage=Docker(registry_url="test", image_name="name", image_tag="tag"), ) @pytest.mark.parametrize("run_config", [ECSRun(), UniversalRun(), None]) def test_deploy_flow_registers_new_task_definition(self, run_config, aws): aws.ecs.register_task_definition.return_value = { "taskDefinition": { "taskDefinitionArn": "my-taskdef-arn" } } aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]} res = self.deploy_flow(run_config) assert aws.ecs.register_task_definition.called assert (aws.ecs.register_task_definition.call_args[1]["family"] == "prefect-test-flow-flow-run-id") assert aws.ecs.run_task.called assert aws.ecs.run_task.call_args[1][ "taskDefinition"] == "my-taskdef-arn" assert "my-task-arn" in res assert aws.ecs.deregister_task_definition.called assert (aws.ecs.deregister_task_definition.call_args[1] ["taskDefinition"] == "my-taskdef-arn") def test_deploy_flow_uses_provided_task_definition_arn(self, aws): aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]} res = self.deploy_flow(ECSRun(task_definition_arn="my-taskdef-arn")) assert not aws.ecs.register_task_definition.called assert aws.ecs.run_task.called assert aws.ecs.run_task.call_args[1][ "taskDefinition"] == "my-taskdef-arn" assert not aws.ecs.deregister_task_definition.called assert "my-task-arn" in res def test_deploy_flow_run_task_fails(self, aws): aws.ecs.run_task.return_value = { "tasks": [], "failures": [{ "reason": "my-reason" }], } with pytest.raises(ValueError) as exc: self.deploy_flow(ECSRun()) assert aws.ecs.run_task.called assert aws.ecs.deregister_task_definition.called assert "my-reason" in str(exc.value) def test_deploy_flow_forwards_run_task_kwargs(self, aws): aws.ecs.register_task_definition.return_value = { "taskDefinition": { "taskDefinitionArn": "my-taskdef-arn" } } aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]} res = self.deploy_flow( ECSRun(run_task_kwargs={"enableECSManagedTags": True})) assert aws.ecs.run_task.called assert aws.ecs.run_task.call_args[1][ "taskDefinition"] == "my-taskdef-arn" assert aws.ecs.run_task.call_args[1]["enableECSManagedTags"] is True assert "my-task-arn" in res def test_deploy_flow_forwards_run_config_settings(self, aws): aws.ecs.register_task_definition.return_value = { "taskDefinition": { "taskDefinitionArn": "my-taskdef-arn" } } aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]} self.deploy_flow(ECSRun(cpu=8, memory=1024)) aws.ecs.run_task.assert_called_once() assert aws.ecs.run_task.call_args[1]["overrides"]["cpu"] == "8" assert aws.ecs.run_task.call_args[1]["overrides"]["memory"] == "1024"
def test_build_and_register(self, capsys, monkeypatch, force): """Build and register a few flows: - 1 new flow - 1 updated flow - 1 skipped flow - 1 error during registration - 2 sharing the same storage (which fails to build properly) - 2 from a pre-built JSON file """ build_call_count = 0 class MyModule(Module): def build(self): nonlocal build_call_count build_call_count += 1 class BadStorage(Module): def build(self): raise ValueError("whoops!") client = MagicMock() register_serialized_flow = MagicMock() register_serialized_flow.side_effect = [ ("new-id-1", 1, True), ("old-id-2", 2, False), ("new-id-3", 3, True), ValueError("Oh no!"), ("new-id-7", 1, True), ("old-id-8", 2, False), ] monkeypatch.setattr( "prefect.cli.build_register.register_serialized_flow", register_serialized_flow, ) storage1 = MyModule("testing") storage1.result = LocalResult() flow1 = Flow("flow 1", storage=storage1, run_config=UniversalRun(labels=["a"])) flow2 = Flow( "flow 2", storage=MyModule("testing"), environment=LocalEnvironment(labels=["a"]), ) storage2 = MyModule("testing") flow3 = Flow("flow 3", storage=storage2) flow4 = Flow("flow 4", storage=storage2) storage3 = BadStorage("testing") flow5 = Flow("flow 5", storage=storage3) flow6 = Flow("flow 6", storage=storage3) flow7 = box.Box( Flow("flow 7", run_config=UniversalRun(labels=["a"])).serialize(build=False)) flow8 = box.Box( Flow("flow 8", environment=LocalEnvironment( labels=["a"])).serialize(build=False)) flows = [flow1, flow2, flow3, flow4, flow5, flow6, flow7, flow8] stats = build_and_register(client, flows, "my-project-id", labels=["b", "c"], force=force) # 3 calls (one for each unique `MyModule` storage object) assert build_call_count == 3 # 6 register calls (8 - 2 that failed to build storage) assert register_serialized_flow.call_count == 6 for flow, (args, kwargs) in zip(flows, register_serialized_flow.call_args_list): assert not args assert kwargs["client"] is client assert kwargs["serialized_flow"] assert kwargs["project_id"] == "my-project-id" assert kwargs["force"] == force # Stats are recorded properly assert dict(stats) == {"registered": 3, "skipped": 2, "errored": 3} # Flows are properly configured assert flow1.result is storage1.result assert flow1.run_config.labels == {"a", "b", "c"} assert flow2.environment.labels == {"a", "b", "c"} assert isinstance(flow3.run_config, UniversalRun) assert flow3.run_config.labels == {"b", "c"} assert isinstance(flow4.run_config, UniversalRun) assert flow4.run_config.labels == {"b", "c"} assert set(flow7["run_config"]["labels"]) == {"a", "b", "c"} assert set(flow8["environment"]["labels"]) == {"a", "b", "c"} # The output contains a traceback, which will vary between machines # We only check that the following fixed sections exist in the output parts = [ (" Building `MyModule` storage...\n" " Registering 'flow 1'... Done\n" " └── ID: new-id-1\n" " └── Version: 1\n" " Building `MyModule` storage...\n" " Registering 'flow 2'... Skipped (metadata unchanged)\n" " Building `MyModule` storage...\n" " Registering 'flow 3'... Done\n" " └── ID: new-id-3\n" " └── Version: 3\n" " Registering 'flow 4'... Error\n" " Traceback (most recent call last):\n"), (" ValueError: Oh no!\n" "\n" " Building `BadStorage` storage...\n" " Error building storage:\n" " Traceback (most recent call last):\n"), (" ValueError: whoops!\n" "\n" " Registering 'flow 5'... Error\n" " Registering 'flow 6'... Error\n" " Registering 'flow 7'... Done\n" " └── ID: new-id-7\n" " └── Version: 1\n" " Registering 'flow 8'... Skipped (metadata unchanged)\n"), ] out, err = capsys.readouterr() assert not err for part in parts: assert part in out