Пример #1
0
def test_docker_agent_deploy_flow_does_not_include_host_gateway_for_old_engine_versions(
        api, docker_engine_version):
    api.version.return_value = {"Version": docker_engine_version}

    run = UniversalRun()
    storage = Local()

    agent = DockerAgent()
    with pytest.warns(
            UserWarning,
            match=
        ("`host.docker.internal` could not be automatically resolved.*"
         f"feature is not supported on Docker Engine v{docker_engine_version}"
         ),
    ):
        agent.deploy_flow(flow_run=GraphQLResult({
            "flow":
            GraphQLResult({
                "id": "foo",
                "name": "flow-name",
                "storage": storage.serialize(),
                "core_version": "0.13.11",
            }),
            "run_config":
            run.serialize() if run else None,
            "id":
            "id",
            "name":
            "name",
        }))

    assert "extra_hosts" not in api.create_host_config.call_args[1]
Пример #2
0
def prepare_flows(flows: "List[FlowLike]", labels: List[str] = None) -> None:
    """Finish preparing flows.

    Shared code between `register` and `build` for any flow modifications
    required before building the flow's storage. Modifies the flows in-place.
    """
    labels = set(labels or ())

    # Finish setting up all flows before building, to ensure a stable hash
    # for flows sharing storage instances
    for flow in flows:
        if isinstance(flow, dict):
            # Add any extra labels to the flow
            new_labels = set(flow["run_config"].get("labels")
                             or []).union(labels)
            flow["run_config"]["labels"] = sorted(new_labels)
        else:
            # Set the default flow result if not specified
            if not flow.result:
                flow.result = flow.storage.result

            # Add a `run_config` if not configured explicitly
            if flow.run_config is None:
                flow.run_config = UniversalRun()
            # Add any extra labels to the flow (either specified via the CLI,
            # or from the storage object).
            flow.run_config.labels.update(labels)
            flow.run_config.labels.update(flow.storage.labels)

            # Add the flow to storage
            flow.storage.add_flow(flow)
Пример #3
0
    def test_environment_overrides(self, project, region):
        # The precedent order is agent, then run, except for the required env vars
        agent = VertexAgent(
            project,
            region,
            env_vars={
                "a": 0,
                "b": 2,
                "PREFECT__LOGGING__LEVEL": "test0"
            },
        )
        run_config = UniversalRun(env={
            "a": 1,
            "c": 2,
            "PREFECT__LOGGING__LEVEL": "test1"
        })
        flow_run = graphql_result(run_config)

        env = agent.populate_env_vars(flow_run)
        expected = self.DEFAULT.copy()
        expected["PREFECT__LOGGING__LEVEL"] = "test1"
        expected["a"] = 1
        expected["b"] = 2
        expected["c"] = 2
        assert env == expected
Пример #4
0
    def test_initialization(self, cloud_api):
        now = pendulum.now()
        run_config = UniversalRun()

        # verify that the task is initialized as expected
        task = StartFlowRun(
            name="My Flow Run Task",
            checkpoint=False,
            project_name="Test Project",
            flow_name="Test Flow",
            new_flow_context={"foo": "bar"},
            parameters={"test": "ing"},
            run_config=run_config,
            run_name="test-run",
            scheduled_start_time=now,
        )
        assert task.name == "My Flow Run Task"
        assert task.checkpoint is False
        assert task.project_name == "Test Project"
        assert task.flow_name == "Test Flow"
        assert task.new_flow_context == {"foo": "bar"}
        assert task.parameters == {"test": "ing"}
        assert task.run_config == run_config
        assert task.run_name == "test-run"
        assert task.scheduled_start_time == now
Пример #5
0
    def test_flow_run_task_submit_args(self, client, cloud_api,
                                       idempotency_key, task_run_id):
        run_config = UniversalRun()

        # verify that create_flow_run was called
        task = StartFlowRun(
            project_name="Test Project",
            flow_name="Test Flow",
            parameters={"test": "ing"},
            run_config=run_config,
            run_name="test-run",
        )
        # verify that run returns the new flow run ID
        with prefect.context(task_run_id=task_run_id):
            assert task.run(idempotency_key=idempotency_key) == "xyz890"
        # verify the GraphQL query was called with the correct arguments
        query_args = list(
            client.graphql.call_args_list[0][0][0]["query"].keys())[0]
        assert "Test Project" in query_args
        assert "Test Flow" in query_args

        # verify create_flow_run was called with the correct arguments
        assert client.create_flow_run.call_args[1] == dict(
            flow_id="abc123",
            parameters={"test": "ing"},
            run_config=run_config,
            idempotency_key=idempotency_key or task_run_id,
            context=None,
            run_name="test-run",
            scheduled_start_time=None,
        )
Пример #6
0
 async def test_create_flow_run_run_config_and_labels(
     self,
     tenant_id,
     project_id,
     set_run_config,
     set_group_run_config,
     set_labels,
     set_group_labels,
 ):
     """Check that a flow-run's run config and labels take the following precedence:
     - run_config: flow run, flow group, flow
     - labels: flow run, flow run run_config, flow group, flow group run_config,
       flow run_config
     """
     labels = ["from-flow"]
     flow_id = await api.flows.create_flow(
         project_id=project_id,
         serialized_flow=prefect.Flow(
             name="test",
             run_config=UniversalRun(labels=labels)).serialize(),
     )
     flow = await models.Flow.where(id=flow_id
                                    ).first({"flow_group_id", "run_config"})
     run_config = flow.run_config
     run_kwargs = {}
     if set_group_run_config:
         labels = ["from-group-run-config"]
         run_config = UniversalRun(labels=labels).serialize()
         await api.flow_groups.set_flow_group_run_config(
             flow_group_id=flow.flow_group_id, run_config=run_config)
     if set_group_labels:
         labels = ["from-group"]
         await api.flow_groups.set_flow_group_labels(
             flow_group_id=flow.flow_group_id, labels=labels)
     if set_run_config:
         labels = ["from-run-config"]
         run_kwargs["run_config"] = run_config = UniversalRun(
             labels=labels).serialize()
     if set_labels:
         run_kwargs["labels"] = labels = ["from-run"]
     # create a run
     flow_run_id = await api.runs.create_flow_run(flow_id=flow_id,
                                                  **run_kwargs)
     flow_run = await models.FlowRun.where(id=flow_run_id
                                           ).first({"labels", "run_config"})
     assert flow_run.labels == labels
     assert flow_run.run_config == run_config
Пример #7
0
def test_get_flow_image_raises_on_missing_info():
    flow = Flow(
        "test",
        run_config=UniversalRun(),
        storage=Local(),
    )
    with pytest.raises(ValueError):
        get_flow_image(flow=flow)
def test_docker_agent_deploy_flow_run_config(api, run_kind,
                                             has_docker_storage):
    if has_docker_storage:
        storage = Docker(registry_url="testing",
                         image_name="on-storage",
                         image_tag="tag")
        image = "testing/on-storage:tag"
    else:
        storage = Local()
        image = "on-run-config" if run_kind == "docker" else "prefecthq/prefect:0.13.11"

    if run_kind == "docker":
        env = {"TESTING": "VALUE"}
        host_config = {"auto_remove": False, "shm_size": "128m"}
        exp_host_config = {
            "auto_remove": False,
            "extra_hosts": {
                "host.docker.internal": "host-gateway"
            },
            "shm_size": "128m",
        }
        run = DockerRun(image=image, env=env, host_config=host_config)
    else:
        env = {}
        host_config = {}
        exp_host_config = {
            "auto_remove": True,
            "extra_hosts": {
                "host.docker.internal": "host-gateway"
            },
        }
        run = None if run_kind == "missing" else UniversalRun()

    agent = DockerAgent()
    agent.deploy_flow(flow_run=GraphQLResult({
        "flow":
        GraphQLResult({
            "id": "foo",
            "name": "flow-name",
            "storage": storage.serialize(),
            "core_version": "0.13.11",
        }),
        "run_config":
        run.serialize() if run else None,
        "id":
        "id",
        "name":
        "name",
    }))

    assert api.create_container.called
    assert api.create_container.call_args[0][0] == image
    res_env = api.create_container.call_args[1]["environment"]
    for k, v in env.items():
        assert res_env[k] == v
    res_host_config = api.create_host_config.call_args[1]
    for k, v in exp_host_config.items():
        assert res_host_config[k] == v
Пример #9
0
def test_get_flow_image_docker_storage():
    flow = Flow(
        "test",
        run_config=UniversalRun(),
        storage=Docker(registry_url="test", image_name="name",
                       image_tag="tag"),
    )
    image = get_flow_image(flow=flow)
    assert image == "test/name:tag"
Пример #10
0
    def test_environment_has_agent_token_from_config(self, agent):
        with set_temporary_config({"cloud.agent.auth_token": "TEST_TOKEN"}):
            run_config = UniversalRun()
            flow_run = graphql_result(run_config)
            env = agent.populate_env_vars(flow_run)

        expected = self.DEFAULT.copy()
        expected["PREFECT__CLOUD__AUTH_TOKEN"] = "TEST_TOKEN"
        assert env == expected
Пример #11
0
    def test_env_list(self, agent):
        # test to ensure the content in the env list is the expected format
        run_config = UniversalRun(env={"a": 1})
        flow_run = graphql_result(run_config)
        task_def = agent.generate_task_definition(flow_run)

        env_list = task_def["job_spec"]["worker_pool_specs"][0][
            "container_spec"]["env"]
        assert {"name": "a", "value": 1} in env_list
Пример #12
0
    def test_environment_has_api_key_from_config(self, agent,
                                                 config_with_api_key):
        run_config = UniversalRun()
        flow_run = graphql_result(run_config)
        env = agent.populate_env_vars(flow_run)

        expected = self.DEFAULT.copy()
        expected[
            "PREFECT__CLOUD__API_KEY"] == config_with_api_key.cloud.api_key
        expected[
            "PREFECT__CLOUD__AUTH_TOKEN"] == config_with_api_key.cloud.api_key
        expected[
            "PREFECT__CLOUD__TENANT_ID"] == config_with_api_key.cloud.tenant_id
        assert env == expected
Пример #13
0
    def test_environment_has_api_key_from_config(self, agent, tenant_id):
        with set_temporary_config({
                "cloud.api_key": "TEST_KEY",
                "cloud.tenant_id": tenant_id,
                "cloud.agent.auth_token": None,
        }):
            run_config = UniversalRun()
            flow_run = graphql_result(run_config)
            env = agent.populate_env_vars(flow_run)

        expected = self.DEFAULT.copy()
        expected["PREFECT__CLOUD__API_KEY"] == "TEST_KEY"
        expected["PREFECT__CLOUD__AUTH_TOKEN"] == "TEST_KEY"
        expected["PREFECT__CLOUD__TENANT_ID"] == "ID"
        assert env == expected
Пример #14
0
    def test_deploy_flow_job_spec(self, aiplatform, agent, region, project):
        aiplatform.create_custom_job.return_value = box.Box(
            name=
            "/projects/abc/locations/us-central1/customJobs/custom_job_name")
        result = self.deploy_flow(agent, UniversalRun())

        # Check that we repsected the vertex response name in the url info
        assert result.endswith(f"{agent.region_name}/training/custom_job_name")

        aiplatform.create_custom_job.assert_called_once()

        # correct region and project
        assert (aiplatform.create_custom_job.call_args[1]["parent"] ==
                f"projects/{project}/locations/{region}")
        # correct job spec for a default job
        assert (aiplatform.create_custom_job.call_args[1]["custom_job"] ==
                self.DEFAULT_JOB)
Пример #15
0
def test_docker_agent_deploy_flow_run_config(api, run_kind, has_docker_storage):
    if has_docker_storage:
        storage = Docker(
            registry_url="testing", image_name="on-storage", image_tag="tag"
        )
        image = "testing/on-storage:tag"
    else:
        storage = Local()
        image = "on-run-config" if run_kind == "docker" else "prefecthq/prefect:0.13.11"

    if run_kind == "docker":
        env = {"TESTING": "VALUE"}
        run = DockerRun(image=image, env=env)
    else:
        env = {}
        run = None if run_kind == "missing" else UniversalRun()

    agent = DockerAgent()
    agent.deploy_flow(
        flow_run=GraphQLResult(
            {
                "flow": GraphQLResult(
                    {
                        "id": "foo",
                        "name": "flow-name",
                        "storage": storage.serialize(),
                        "core_version": "0.13.11",
                    }
                ),
                "run_config": run.serialize() if run else None,
                "id": "id",
                "name": "name",
            }
        )
    )

    assert api.create_container.called
    assert api.create_container.call_args[0][0] == image
    res_env = api.create_container.call_args[1]["environment"]
    for k, v in env.items():
        assert res_env[k] == v
Пример #16
0
    def test_generate_task_definition_defaults(self, agent):
        run_config = UniversalRun()
        flow_run = graphql_result(run_config)
        task_def = agent.generate_task_definition(flow_run)
        job_spec = task_def["job_spec"]
        pool_spec = job_spec["worker_pool_specs"][0]
        env_list = agent._to_env_list(agent.populate_env_vars(flow_run))

        assert task_def["display_name"]

        for unspecified in ["network", "service_account", "scheduling"]:
            assert job_spec.get(unspecified) is None

        assert pool_spec["machine_spec"] == {"machine_type": "e2-standard-4"}
        assert pool_spec["replica_count"] == 1
        assert pool_spec["container_spec"] == {
            "image_uri":
            "prefecthq/prefect:0.13.0",  # from the core version above
            "command": ["prefect", "execute", "flow-run"],
            "args": [],
            "env": env_list,
        }
Пример #17
0
    def test_environment_defaults(self, agent):
        run_config = UniversalRun()
        flow_run = graphql_result(run_config)

        env = agent.populate_env_vars(flow_run)
        assert env == self.DEFAULT
    def test_build(self, tmpdir, filter_names, update):
        path = str(tmpdir.join("test.py"))
        source = textwrap.dedent("""
            from prefect import Flow
            from prefect.run_configs import LocalRun

            flow1 = Flow("flow 1")
            flow2 = Flow("flow 2", run_config=LocalRun(labels=["new"]))
            """)
        with open(path, "w") as f:
            f.write(source)

        out_path = str(tmpdir.join("flows.json"))

        if update:
            orig_flows = [
                Flow("flow 2", run_config=UniversalRun(labels=["orig"])),
                Flow("flow 3"),
            ]
            orig = {
                "version": 1,
                "flows": [f.serialize(build=False) for f in orig_flows],
            }
            with open(out_path, "w") as f:
                json.dump(orig, f)

        cmd = ["build", "--path", path, "-l", "a", "-l", "b", "-o", out_path]
        if filter_names:
            cmd.extend(["--name", "flow 2"])
        if update:
            cmd.append("--update")
        result = CliRunner().invoke(cli, cmd)

        assert result.exit_code == 0

        with open(out_path, "rb") as f:
            out = json.load(f)

        assert out["version"] == 1
        assert out["flows"]

        if filter_names:
            build_names = ["flow 2"]
            flow2 = out["flows"][0]
        else:
            build_names = ["flow 1", "flow 2"]
            flow2 = out["flows"][1]
        exp_names = build_names + ["flow 3"] if update else build_names
        written_names = [f["name"] for f in out["flows"]]
        assert written_names == exp_names

        storage_labels = Local().labels
        assert set(flow2["run_config"]["labels"]) == {
            "a", "b", "new", *storage_labels
        }
        assert flow2["run_config"]["type"] == "LocalRun"

        build_logs = "\n".join(
            f"  Building `Local` storage...\n  Building '{name}'... Done"
            for name in build_names)
        out = (
            f"Collecting flows...\n"
            f"Processing {path!r}:\n"
            f"{build_logs}\n"
            f"Writing output to {out_path!r}\n"
            f"========================== {len(build_names)} built ==========================\n"
        )
        assert result.stdout == out
Пример #19
0
def test_generate_flow_run_environ():
    with set_temporary_config({
            "cloud.send_flow_run_logs": "CONFIG_SEND_RUN_LOGS",
            "backend": "CONFIG_BACKEND",
            "cloud.api": "CONFIG_API",
            "cloud.tenant_id": "CONFIG_TENANT_ID",
            # Deprecated tokens are included if available but overriden by `run_api_key`
            "cloud.agent.auth_token": "CONFIG_AUTH_TOKEN",
            "cloud.auth_token": None,
    }):
        result = generate_flow_run_environ(
            flow_run_id="flow-run-id",
            flow_id="flow-id",
            run_config=UniversalRun(
                env={
                    # Run config should take precendence for these values
                    "A": "RUN_CONFIG",
                    "B": "RUN_CONFIG",
                    "C": None,  # Null values are excluded
                    # Should not be overridable using a run config
                    "PREFECT__CONTEXT__FLOW_RUN_ID": "RUN_CONFIG",
                    "PREFECT__CONTEXT__FLOW_ID": "RUN_CONFIG",
                    "PREFECT__CLOUD__API_KEY": "RUN_CONFIG",
                    "PREFECT__CLOUD__TENANT_ID": "RUN_CONFIG",
                    "PREFECT__CLOUD__API": "RUN_CONFIG",
                    "PREFECT__BACKEND": "RUN_CONFIG",
                }),
            run_api_key="api-key",
        )

    assert result == {
        # Passed via kwargs directly
        "PREFECT__CONTEXT__FLOW_RUN_ID":
        "flow-run-id",
        "PREFECT__CONTEXT__FLOW_ID":
        "flow-id",
        "PREFECT__CLOUD__API_KEY":
        "api-key",
        "PREFECT__CLOUD__AUTH_TOKEN":
        "api-key",  # Backwards compatibility for tokens
        # Set from prefect config
        "PREFECT__LOGGING__LEVEL":
        prefect.config.logging.level,
        "PREFECT__LOGGING__FORMAT":
        prefect.config.logging.format,
        "PREFECT__LOGGING__DATEFMT":
        prefect.config.logging.datefmt,
        "PREFECT__CLOUD__SEND_FLOW_RUN_LOGS":
        "CONFIG_SEND_RUN_LOGS",
        "PREFECT__BACKEND":
        "CONFIG_BACKEND",
        "PREFECT__CLOUD__API":
        "CONFIG_API",
        "PREFECT__CLOUD__TENANT_ID":
        "CONFIG_TENANT_ID",
        # Overridden by run config
        "A":
        "RUN_CONFIG",
        "B":
        "RUN_CONFIG",
        # Hard-coded
        "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
        "prefect.engine.cloud.CloudFlowRunner",
        "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
        "prefect.engine.cloud.CloudTaskRunner",
    }
Пример #20
0
from prefect.run_configs import KubernetesRun, LocalRun, DockerRun, ECSRun, UniversalRun
from prefect.serialization.run_config import RunConfigSchema, RunConfigSchemaBase


def test_serialized_run_config_sorts_labels():
    assert RunConfigSchemaBase().dump({"labels": ["b", "c",
                                                  "a"]})["labels"] == [
                                                      "a",
                                                      "b",
                                                      "c",
                                                  ]


@pytest.mark.parametrize(
    "config",
    [UniversalRun(),
     UniversalRun(env={"FOO": "BAR"}, labels=["a", "b"])])
def test_serialize_universal_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert (config.env) == config2.env
    assert sorted(config.labels) == sorted(config2.labels)


@pytest.mark.parametrize(
    "config",
    [
        KubernetesRun(),
        KubernetesRun(
            job_template_path="s3://bucket/test.yaml",
            image="myimage",
Пример #21
0
def build_and_register(
    client: "prefect.Client",
    flows: "List[prefect.Flow]",
    project: str,
    labels: List[str] = None,
    force: bool = False,
) -> Counter:
    """Build and register all flows.

    Args:
        - client (prefect.Client): the prefect client to use
        - flows (List[prefect.Flow]): the flows to register
        - project (str): the project in which to register the flows
        - labels (List[str], optional): Any extra labels to set on all flows
        - force (bool, optional): If false (default), an idempotency key will
            be used to avoid unnecessary register calls.

    Returns:
        - Counter: stats about the number of successful, failed, and skipped flows.
    """
    labels = set(labels) if labels else None

    # Finish setting up all flows before building, to ensure a stable hash
    # for flows sharing storage instances
    for flow in flows:
        # Set the default flow result if not specified
        if not flow.result:
            flow.result = flow.storage.result

        # Add a `run_config` if not configured explicitly
        # Also add any extra labels to the flow
        if flow.run_config is None:
            if flow.environment is not None:
                flow.environment.labels.update(labels)
            else:
                flow.run_config = UniversalRun(labels=labels)
        else:
            flow.run_config.labels.update(labels)

    # Group flows by storage instance.
    # Also adds all flows to their respective storage instance.
    storage_to_flows = defaultdict(list)
    for flow in flows:
        flow.storage.add_flow(flow)
        storage_to_flows[flow.storage].append(flow)

    # Register each flow, building storage as needed.
    # Stats on success/fail/skip rates are kept for later display
    stats = Counter(registered=0, errored=0, skipped=0)
    for storage, flows in storage_to_flows.items():
        # Build storage
        click.echo(f"  Building `{type(storage).__name__}` storage...")
        try:
            storage.build()
        except Exception as exc:
            click.secho("    Error building storage:", fg="red")
            log_exception(exc, indent=6)
            red_error = click.style("Error", fg="red")
            for flow in flows:
                click.echo(f"  Registering {flow.name!r}... {red_error}")
                stats["errored"] += 1
            continue

        for flow in flows:
            click.echo(f"  Registering {flow.name!r}...", nl=False)
            try:
                # Get most recent flow id for this flow. This can be removed once
                # the registration graphql routes return more information
                resp = client.graphql({
                    "query": {
                        with_args(
                            "flow",
                            {
                                "where": {
                                    "_and": {
                                        "name": {
                                            "_eq": flow.name
                                        },
                                        "project": {
                                            "name": {
                                                "_eq": project
                                            }
                                        },
                                    }
                                },
                                "order_by": {
                                    "version": EnumValue("desc")
                                },
                                "limit": 1,
                            },
                        ): {"id", "version"}
                    }
                })
                if resp.data.flow:
                    prev_id = resp.data.flow[0].id
                    prev_version = resp.data.flow[0].version
                else:
                    prev_id = None
                    prev_version = 0
                new_id = client.register(
                    flow=flow,
                    project_name=project,
                    build=False,
                    no_url=True,
                    idempotency_key=(None
                                     if force else flow.serialized_hash()),
                )
            except Exception as exc:
                click.secho(" Error", fg="red")
                log_exception(exc, indent=4)
                stats["errored"] += 1
            else:
                if new_id == prev_id:
                    click.secho(" Skipped", fg="yellow")
                    stats["skipped"] += 1
                else:
                    click.secho(" Done", fg="green")
                    click.echo(f"  └── ID: {new_id}")
                    click.echo(f"  └── Version: {prev_version + 1}")
                    stats["registered"] += 1
    return stats
Пример #22
0
def build_example(path):
    """Build an example located at a specific path.

    Args:
        - path (str): the path to the example source file.

    Returns:
        - markdown (str): the rendered example in markdown
        - flows (Dict[str, Flow]): the flows found in the example
    """
    from prefect import Flow
    from prefect.storage import GitHub
    from prefect.run_configs import UniversalRun

    # Use the current commit (if specified in the environment)
    ref = os.getenv("GIT_SHA", "master")

    with open(path, "r", encoding="utf-8") as f:
        contents = f.read()

    namespace = {}
    exec(contents, namespace)

    try:
        header = namespace["__doc__"]
        tree = ast.parse(contents)
        offset = tree.body[1].lineno - 1
    except Exception as exc:
        raise ValueError(
            f"No docstring header found for example at {path}") from exc

    flows = {}
    relpath = os.path.relpath(path, start=ROOT)
    for f in namespace.values():
        if isinstance(f, Flow):
            f.storage = GitHub("PrefectHQ/prefect", path=relpath, ref=ref)
            if not f.run_config:
                f.run_config = UniversalRun()
            f.run_config.labels.add("prefect-examples")
            flows[f.name] = f.serialize(build=True)

    source = "\n".join(contents.splitlines()[offset:]).strip()

    res = subprocess.run(
        [sys.executable, path],
        capture_output=True,
        check=True,
        env={"PREFECT__LOGGING__FORMAT": "%(levelname)s | %(message)s"},
    )
    output = res.stdout.decode("utf-8").strip()

    register_lines = [
        f"prefect register --json https://docs.prefect.io/examples.json"
    ]
    for name in sorted(flows):
        register_lines.append(f"    --name {name!r}")
    register_lines.append(f"    --project 'Prefect Examples'")

    rendered = EXAMPLE_TEMPLATE.format(
        header=header,
        source=source,
        output=output,
        ref=ref,
        relpath=relpath,
        register_cmd=" \\\n".join(register_lines),
    ).lstrip()

    return rendered, flows
Пример #23
0
        {
            "timestamp": pendulum.now().subtract(seconds=20).isoformat(),
            "serialized_state": Submitted(message="past-state").serialize(),
        },
    ],
    "parameters": {
        "param": "value"
    },
    "context": {
        "foo": "bar"
    },
    "labels": ["label"],
    "updated":
    pendulum.now().isoformat(),
    "run_config":
    UniversalRun().serialize(),
}
FLOW_RUN_DATA_2 = {
    "id":
    "id-2",
    "name":
    "name-2",
    "flow_id":
    "flow_id-2",
    "serialized_state":
    Success(message="state-2").serialize(),
    "states": [
        {
            "timestamp": pendulum.now().subtract(seconds=10).isoformat(),
            "serialized_state": Running(message="past-state").serialize(),
        },
Пример #24
0
        {
            "timestamp": pendulum.now().subtract(seconds=20).isoformat(),
            "serialized_state": Submitted(message="past-state").serialize(),
        },
    ],
    "parameters": {
        "param": "value"
    },
    "context": {
        "foo": "bar"
    },
    "labels": ["label"],
    "updated":
    pendulum.now().isoformat(),
    "run_config":
    UniversalRun().serialize(),
}
FLOW_RUN_DATA_2 = {
    "id":
    "id-2",
    "name":
    "name-2",
    "flow_id":
    "flow_id-2",
    "serialized_state":
    Success(message="state-2").serialize(),
    "states": [
        {
            "timestamp": pendulum.now().subtract(seconds=10).isoformat(),
            "serialized_state": Running(message="past-state").serialize(),
        },
Пример #25
0
    def test_build_and_register(self, capsys, monkeypatch, force):
        """Build and register a few flows:
        - 1 new flow
        - 1 updated flow
        - 1 skipped flow
        - 1 error during registration
        - 2 sharing the same storage (which fails to build properly)
        """
        build_call_count = 0

        class MyModule(Module):
            def build(self):
                nonlocal build_call_count
                build_call_count += 1

        class BadStorage(Module):
            def build(self):
                raise ValueError("whoops!")

        client = MagicMock()
        client.graphql.side_effect = [
            GraphQLResult({"data": {"flow": []}}),
            GraphQLResult({"data": {"flow": [{"id": "old-id-2", "version": 1}]}}),
            GraphQLResult({"data": {"flow": [{"id": "old-id-3", "version": 2}]}}),
            GraphQLResult({"data": {"flow": [{"id": "old-id-4", "version": 3}]}}),
        ]
        client.register.side_effect = [
            "new-id-1",
            "old-id-2",
            "new-id-3",
            ValueError("Oh no!"),
        ]

        storage1 = MyModule("testing")
        storage1.result = LocalResult()
        flow1 = Flow("flow 1", storage=storage1, run_config=UniversalRun(labels=["a"]))
        flow2 = Flow(
            "flow 2",
            storage=MyModule("testing"),
            environment=LocalEnvironment(labels=["a"]),
        )
        storage2 = MyModule("testing")
        flow3 = Flow("flow 3", storage=storage2)
        flow4 = Flow("flow 4", storage=storage2)
        storage3 = BadStorage("testing")
        flow5 = Flow("flow 5", storage=storage3)
        flow6 = Flow("flow 6", storage=storage3)
        flows = [flow1, flow2, flow3, flow4, flow5, flow6]

        stats = build_and_register(
            client, flows, "testing", labels=["b", "c"], force=force
        )

        # 3 calls (one for each unique `MyModule` storage object)
        assert build_call_count == 3

        # 4 register calls (6 - 2 that failed to build storage)
        assert client.register.call_count == 4
        for flow, (args, kwargs) in zip(flows, client.register.call_args_list):
            assert not args
            assert kwargs["flow"] is flow
            assert kwargs["project_name"] == "testing"
            assert kwargs["build"] is False
            assert kwargs["no_url"] is True
            if force:
                assert kwargs["idempotency_key"] is None
            else:
                assert kwargs["idempotency_key"]

        # Stats are recorded properly
        assert dict(stats) == {"registered": 2, "skipped": 1, "errored": 3}

        # Flows are properly configured
        assert flow1.result is storage1.result
        assert flow1.run_config.labels == {"a", "b", "c"}
        assert flow2.environment.labels == {"a", "b", "c"}
        assert isinstance(flow3.run_config, UniversalRun)
        assert flow3.run_config.labels == {"b", "c"}
        assert isinstance(flow4.run_config, UniversalRun)
        assert flow4.run_config.labels == {"b", "c"}

        # The output contains a traceback, which will vary between machines
        # We only check that the following fixed sections exist in the output
        parts = [
            (
                "  Building `MyModule` storage...\n"
                "  Registering 'flow 1'... Done\n"
                "  └── ID: new-id-1\n"
                "  └── Version: 1\n"
                "  Building `MyModule` storage...\n"
                "  Registering 'flow 2'... Skipped\n"
                "  Building `MyModule` storage...\n"
                "  Registering 'flow 3'... Done\n"
                "  └── ID: new-id-3\n"
                "  └── Version: 3\n"
                "  Registering 'flow 4'... Error\n"
                "    Traceback (most recent call last):\n"
            ),
            (
                "    ValueError: Oh no!\n"
                "\n"
                "  Building `BadStorage` storage...\n"
                "    Error building storage:\n"
                "      Traceback (most recent call last):\n"
            ),
            (
                "      ValueError: whoops!\n"
                "\n"
                "  Registering 'flow 5'... Error\n"
                "  Registering 'flow 6'... Error\n"
            ),
        ]
        out, err = capsys.readouterr()
        assert not err
        for part in parts:
            assert part in out
Пример #26
0
class TestCreateFlowRun:
    def test_does_not_accept_both_id_and_name(self):
        with pytest.raises(ValueError,
                           match="Received both `flow_id` and `flow_name`"):
            create_flow_run.run(flow_id=uuid4(), flow_name="foo")

    def test_requires_id_or_name(self):
        with pytest.raises(ValueError,
                           match="`flow_id` and `flow_name` are null"):
            create_flow_run.run(flow_id=None, flow_name=None)

    @pytest.mark.parametrize(
        "kwargs",
        [
            {
                "flow_id": "flow-id"
            },
            {
                "flow_name": "flow-name"
            },
            {
                "flow_name": "flow-name",
                "project_name": "project-name"
            },
        ],
    )
    def test_lookup_uses_given_identifiers(self, kwargs, MockFlowView,
                                           MockClient):
        create_flow_run.run(**kwargs)
        if "flow_id" in kwargs:
            MockFlowView.from_id.assert_called_once_with("flow-id")
        elif "flow_name" in kwargs:
            MockFlowView.from_flow_name.assert_called_once_with(
                "flow-name", project_name=kwargs.get("project_name", ""))

    def test_creates_flow_run_with_defaults(self, MockFlowView, MockClient):
        MockFlowView.from_id.return_value.flow_id = "flow-id"
        create_flow_run.run(flow_id="flow-id")
        MockClient().create_flow_run.assert_called_once_with(
            flow_id="flow-id",
            parameters=None,
            run_name=None,
            labels=None,
            context=None,
            run_config=None,
            scheduled_start_time=None,
            idempotency_key=None,
        )

    @pytest.mark.parametrize(
        "kwargs",
        [
            {
                "parameters": dict(x=1, y="foo")
            },
            {
                "run_name": "run-name"
            },
            {
                "labels": ["a", "b"]
            },
            {
                "context": {
                    "var": "val"
                }
            },
            {
                "run_config": UniversalRun(env={"x"})
            },
            {
                "scheduled_start_time": pendulum.now().add(days=1)
            },
        ],
    )
    def test_creates_flow_with_given_settings(self, MockFlowView, MockClient,
                                              kwargs):
        MockFlowView.from_id.return_value.flow_id = "flow-id"
        create_flow_run.run(flow_id="flow-id", **kwargs)
        MockClient().create_flow_run.assert_called_once_with(
            flow_id="flow-id",
            parameters=kwargs.get("parameters"),
            run_name=kwargs.get("run_name"),
            labels=kwargs.get("labels"),
            context=kwargs.get("context"),
            run_config=kwargs.get("run_config"),
            scheduled_start_time=kwargs.get("scheduled_start_time"),
            idempotency_key=None,
        )

    @pytest.mark.parametrize(
        "kwargs",
        [
            {
                "scheduled_start_time": pendulum.duration(days=1)
            },
        ],
    )
    def test_creates_flow_in_future(self, MockFlowView, MockClient,
                                    monkeypatch, kwargs):
        MockFlowView.from_id.return_value.flow_id = "flow-id"
        # Mocking the concept of "now" so we can have consistent assertions
        now = pendulum.now("utc")
        mock_now = MagicMock(return_value=now)
        monkeypatch.setattr("prefect.client.client.pendulum.now", mock_now)
        create_flow_run.run(flow_id="flow-id", **kwargs)
        MockClient().create_flow_run.assert_called_once_with(
            flow_id="flow-id",
            parameters=kwargs.get("parameters"),
            run_name=kwargs.get("run_name"),
            labels=kwargs.get("labels"),
            context=kwargs.get("context"),
            run_config=kwargs.get("run_config"),
            scheduled_start_time=pendulum.now("utc") +
            kwargs.get("scheduled_start_time"),
            idempotency_key=None,
        )

    def test_generates_run_name_from_parent_and_child(self, MockFlowView,
                                                      MockClient):
        MockFlowView.from_id.return_value.flow_id = "flow-id"
        MockFlowView.from_id.return_value.name = "child-name"
        with prefect.context(flow_run_name="parent-run",
                             task_run_id="parent-task-run"):
            create_flow_run.run(flow_id="flow-id")
        MockClient().create_flow_run.assert_called_once_with(
            flow_id="flow-id",
            parameters=None,
            run_name="parent-run-child-name",
            labels=None,
            context=None,
            run_config=None,
            scheduled_start_time=None,
            idempotency_key="parent-task-run",
        )

    def test_returns_flow_run_idl(self, MockFlowView, MockClient):
        MockClient().create_flow_run.return_value = "flow-run-id"
        result = create_flow_run.run(flow_id="flow-id")
        assert result == "flow-run-id"

    def test_displays_flow_run_url(self, MockFlowView, MockClient, caplog):
        MockClient().create_flow_run.return_value = "flow-run-id"
        MockClient().get_cloud_url.return_value = "fake-url"
        create_flow_run.run(flow_id="flow-id")
        MockClient().get_cloud_url.assert_called_once_with("flow-run",
                                                           "flow-run-id",
                                                           as_user=False)
        assert "Created flow run '<generated-name>': fake-url" in caplog.text
Пример #27
0
        agent.deploy_flow(flow_run=GraphQLResult(
            {
                "id": "id",
                "flow": {
                    "storage": Local().serialize(),
                    "id": "foo",
                    "core_version": "0.13.0",
                },
                "run_config": KubernetesRun().serialize(),
            }, ))

    assert not popen.called
    assert len(agent.processes) == 0


@pytest.mark.parametrize("run_config", [None, UniversalRun()])
def test_local_agent_deploy_null_or_univeral_run_config(
        monkeypatch, run_config):
    popen = MagicMock()
    monkeypatch.setattr("prefect.agent.local.agent.Popen", popen)

    agent = LocalAgent()

    agent.deploy_flow(flow_run=GraphQLResult(
        {
            "id": "id",
            "flow": {
                "storage": Local().serialize(),
                "id": "foo",
                "core_version": "0.13.0",
            },
Пример #28
0
import pytest

<<<<<<< HEAD
from prefect.run_configs import KubernetesRun, LocalRun, DockerRun, ECSRun, UniversalRun
from prefect.serialization.run_config import RunConfigSchema, RunConfigSchemaBase


def test_serialized_run_config_sorts_labels():
    assert RunConfigSchemaBase().dump({"labels": ["b", "c", "a"]})["labels"] == [
        "a",
        "b",
        "c",
    ]


@pytest.mark.parametrize("config", [UniversalRun(), UniversalRun(labels=["a", "b"])])
def test_serialize_universal_run(config):
    msg = RunConfigSchema().dump(config)
    config2 = RunConfigSchema().load(msg)
    assert sorted(config.labels) == sorted(config2.labels)
=======
from prefect.run_configs import KubernetesRun, LocalRun, DockerRun, ECSRun
from prefect.serialization.run_config import RunConfigSchema
>>>>>>> prefect clone


@pytest.mark.parametrize(
    "config",
    [
        KubernetesRun(),
        KubernetesRun(
Пример #29
0
class TestDeployFlow:
    def deploy_flow(self, run_config, storage=None, **kwargs):
        agent = ECSAgent(**kwargs)
        flow_run = GraphQLResult({
            "flow":
            GraphQLResult({
                "storage": (storage or Local()).serialize(),
                "id": "flow-id",
                "version": 1,
                "name": "Test Flow",
                "core_version": "0.13.0",
            }),
            "run_config":
            run_config.serialize() if run_config else None,
            "id":
            "flow-run-id",
        })
        return agent.deploy_flow(flow_run)

    def test_deploy_flow_errors_if_not_ecs_run_config(self):
        with pytest.raises(
                TypeError,
                match=
                "`run_config` of type `LocalRun`, only `ECSRun` is supported",
        ):
            self.deploy_flow(LocalRun())

    def test_deploy_flow_errors_if_mix_task_definition_arn_and_docker_storage(
            self):
        with pytest.raises(
                ValueError,
                match=
                "Cannot provide `task_definition_arn` when using `Docker` storage",
        ):
            self.deploy_flow(
                ECSRun(task_definition_arn="my-taskdef-arn"),
                storage=Docker(registry_url="test",
                               image_name="name",
                               image_tag="tag"),
            )

    @pytest.mark.parametrize("run_config", [ECSRun(), UniversalRun(), None])
    def test_deploy_flow_registers_new_task_definition(self, run_config, aws):
        aws.ecs.register_task_definition.return_value = {
            "taskDefinition": {
                "taskDefinitionArn": "my-taskdef-arn"
            }
        }
        aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]}

        res = self.deploy_flow(run_config)
        assert aws.ecs.register_task_definition.called
        assert (aws.ecs.register_task_definition.call_args[1]["family"] ==
                "prefect-test-flow-flow-run-id")
        assert aws.ecs.run_task.called
        assert aws.ecs.run_task.call_args[1][
            "taskDefinition"] == "my-taskdef-arn"
        assert "my-task-arn" in res
        assert aws.ecs.deregister_task_definition.called
        assert (aws.ecs.deregister_task_definition.call_args[1]
                ["taskDefinition"] == "my-taskdef-arn")

    def test_deploy_flow_uses_provided_task_definition_arn(self, aws):
        aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]}

        res = self.deploy_flow(ECSRun(task_definition_arn="my-taskdef-arn"))
        assert not aws.ecs.register_task_definition.called
        assert aws.ecs.run_task.called
        assert aws.ecs.run_task.call_args[1][
            "taskDefinition"] == "my-taskdef-arn"
        assert not aws.ecs.deregister_task_definition.called
        assert "my-task-arn" in res

    def test_deploy_flow_run_task_fails(self, aws):
        aws.ecs.run_task.return_value = {
            "tasks": [],
            "failures": [{
                "reason": "my-reason"
            }],
        }
        with pytest.raises(ValueError) as exc:
            self.deploy_flow(ECSRun())
        assert aws.ecs.run_task.called
        assert aws.ecs.deregister_task_definition.called
        assert "my-reason" in str(exc.value)

    def test_deploy_flow_forwards_run_task_kwargs(self, aws):
        aws.ecs.register_task_definition.return_value = {
            "taskDefinition": {
                "taskDefinitionArn": "my-taskdef-arn"
            }
        }
        aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]}

        res = self.deploy_flow(
            ECSRun(run_task_kwargs={"enableECSManagedTags": True}))
        assert aws.ecs.run_task.called
        assert aws.ecs.run_task.call_args[1][
            "taskDefinition"] == "my-taskdef-arn"
        assert aws.ecs.run_task.call_args[1]["enableECSManagedTags"] is True
        assert "my-task-arn" in res

    def test_deploy_flow_forwards_run_config_settings(self, aws):
        aws.ecs.register_task_definition.return_value = {
            "taskDefinition": {
                "taskDefinitionArn": "my-taskdef-arn"
            }
        }
        aws.ecs.run_task.return_value = {"tasks": [{"taskArn": "my-task-arn"}]}

        self.deploy_flow(ECSRun(cpu=8, memory=1024))

        aws.ecs.run_task.assert_called_once()
        assert aws.ecs.run_task.call_args[1]["overrides"]["cpu"] == "8"
        assert aws.ecs.run_task.call_args[1]["overrides"]["memory"] == "1024"
    def test_build_and_register(self, capsys, monkeypatch, force):
        """Build and register a few flows:
        - 1 new flow
        - 1 updated flow
        - 1 skipped flow
        - 1 error during registration
        - 2 sharing the same storage (which fails to build properly)
        - 2 from a pre-built JSON file
        """
        build_call_count = 0

        class MyModule(Module):
            def build(self):
                nonlocal build_call_count
                build_call_count += 1

        class BadStorage(Module):
            def build(self):
                raise ValueError("whoops!")

        client = MagicMock()
        register_serialized_flow = MagicMock()
        register_serialized_flow.side_effect = [
            ("new-id-1", 1, True),
            ("old-id-2", 2, False),
            ("new-id-3", 3, True),
            ValueError("Oh no!"),
            ("new-id-7", 1, True),
            ("old-id-8", 2, False),
        ]
        monkeypatch.setattr(
            "prefect.cli.build_register.register_serialized_flow",
            register_serialized_flow,
        )

        storage1 = MyModule("testing")
        storage1.result = LocalResult()
        flow1 = Flow("flow 1",
                     storage=storage1,
                     run_config=UniversalRun(labels=["a"]))
        flow2 = Flow(
            "flow 2",
            storage=MyModule("testing"),
            environment=LocalEnvironment(labels=["a"]),
        )
        storage2 = MyModule("testing")
        flow3 = Flow("flow 3", storage=storage2)
        flow4 = Flow("flow 4", storage=storage2)
        storage3 = BadStorage("testing")
        flow5 = Flow("flow 5", storage=storage3)
        flow6 = Flow("flow 6", storage=storage3)
        flow7 = box.Box(
            Flow("flow 7",
                 run_config=UniversalRun(labels=["a"])).serialize(build=False))
        flow8 = box.Box(
            Flow("flow 8", environment=LocalEnvironment(
                labels=["a"])).serialize(build=False))
        flows = [flow1, flow2, flow3, flow4, flow5, flow6, flow7, flow8]

        stats = build_and_register(client,
                                   flows,
                                   "my-project-id",
                                   labels=["b", "c"],
                                   force=force)

        # 3 calls (one for each unique `MyModule` storage object)
        assert build_call_count == 3

        # 6 register calls (8 - 2 that failed to build storage)
        assert register_serialized_flow.call_count == 6
        for flow, (args,
                   kwargs) in zip(flows,
                                  register_serialized_flow.call_args_list):
            assert not args
            assert kwargs["client"] is client
            assert kwargs["serialized_flow"]
            assert kwargs["project_id"] == "my-project-id"
            assert kwargs["force"] == force

        # Stats are recorded properly
        assert dict(stats) == {"registered": 3, "skipped": 2, "errored": 3}

        # Flows are properly configured
        assert flow1.result is storage1.result
        assert flow1.run_config.labels == {"a", "b", "c"}
        assert flow2.environment.labels == {"a", "b", "c"}
        assert isinstance(flow3.run_config, UniversalRun)
        assert flow3.run_config.labels == {"b", "c"}
        assert isinstance(flow4.run_config, UniversalRun)
        assert flow4.run_config.labels == {"b", "c"}
        assert set(flow7["run_config"]["labels"]) == {"a", "b", "c"}
        assert set(flow8["environment"]["labels"]) == {"a", "b", "c"}

        # The output contains a traceback, which will vary between machines
        # We only check that the following fixed sections exist in the output
        parts = [
            ("  Building `MyModule` storage...\n"
             "  Registering 'flow 1'... Done\n"
             "  └── ID: new-id-1\n"
             "  └── Version: 1\n"
             "  Building `MyModule` storage...\n"
             "  Registering 'flow 2'... Skipped (metadata unchanged)\n"
             "  Building `MyModule` storage...\n"
             "  Registering 'flow 3'... Done\n"
             "  └── ID: new-id-3\n"
             "  └── Version: 3\n"
             "  Registering 'flow 4'... Error\n"
             "    Traceback (most recent call last):\n"),
            ("    ValueError: Oh no!\n"
             "\n"
             "  Building `BadStorage` storage...\n"
             "    Error building storage:\n"
             "      Traceback (most recent call last):\n"),
            ("      ValueError: whoops!\n"
             "\n"
             "  Registering 'flow 5'... Error\n"
             "  Registering 'flow 6'... Error\n"
             "  Registering 'flow 7'... Done\n"
             "  └── ID: new-id-7\n"
             "  └── Version: 1\n"
             "  Registering 'flow 8'... Skipped (metadata unchanged)\n"),
        ]
        out, err = capsys.readouterr()
        assert not err
        for part in parts:
            assert part in out