示例#1
0
def test_get_flow_image_raises_on_missing_info():
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "storage": Local().serialize(),
            "environment": LocalEnvironment().serialize(),
            "id": "id",
        }),
        "id":
        "id",
    })
    with pytest.raises(ValueError):
        get_flow_image(flow_run=flow_run)
示例#2
0
文件: agent.py 项目: shunwen/prefect
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on to a k8s cluster as jobs

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(flow_run.id))  # type: ignore

        image = get_flow_image(flow_run=flow_run)

        job_spec = self.replace_job_spec_yaml(flow_run=flow_run, image=image)

        self.logger.debug(
            "Creating namespaced job {}".format(job_spec["metadata"]["name"])
        )
        job = self.batch_client.create_namespaced_job(
            namespace=self.namespace, body=job_spec
        )

        self.logger.debug("Job {} created".format(job.metadata.name))

        return "Job {}".format(job.metadata.name)
示例#3
0
    def generate_task_definition(self, flow_run: GraphQLResult) -> Dict[str, Any]:
        """Generate an Vertex task definition from a flow run

        Args:
            - flow_run (GraphQLResult): A flow run object

        Returns:
            - dict: a dictionary representation of an Vertex task definition
        """

        run_config = self._get_run_config(flow_run, VertexRun)
        assert isinstance(run_config, VertexRun)  # mypy

        image = get_flow_image(flow_run)
        job_name = slugify.slugify(
            flow_run.flow.name + "-" + flow_run.name,
            max_length=255,
            word_boundary=True,
            save_order=True,
        )
        machine_type = run_config.machine_type

        command = get_flow_run_command(flow_run)
        env = self.populate_env_vars(flow_run)
        env_list = self._to_env_list(env)

        # Start with a default taskdef
        taskdef = {
            "display_name": job_name,
            "job_spec": {
                "worker_pool_specs": [
                    {"machine_spec": {"machine_type": machine_type}, "replica_count": 1}
                ]
            },
        }  # type: Dict[str, Any]

        if run_config.worker_pool_specs is not None:
            taskdef["job_spec"]["worker_pool_specs"] = run_config.worker_pool_specs

        if run_config.network is not None:
            taskdef["job_spec"]["network"] = run_config.network

        if run_config.service_account is not None:
            taskdef["job_spec"]["service_account"] = run_config.service_account
        else:
            taskdef["job_spec"]["service_account"] = self.service_account

        if run_config.scheduling is not None:
            taskdef["job_spec"]["scheduling"] = run_config.scheduling

        # We always set the container spec on the zeroth pool spec to ensure it will run the flow
        taskdef["job_spec"]["worker_pool_specs"][0]["container_spec"] = {
            "image_uri": image,
            "command": command.split(),
            "args": [],
            "env": env_list,
        }

        return taskdef
示例#4
0
def test_get_flow_image_run_config_image_on_RunConfig():
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "storage": Local().serialize(),
            "id": "id",
        }),
        "run_config":
        KubernetesRun(image="myfancyimage").serialize(),
        "id":
        "id",
    })
    image = get_flow_image(flow_run)
    assert image == "myfancyimage"
示例#5
0
def test_get_flow_image_run_config_default_value_from_core_version():
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "core_version": "0.13.0",
            "storage": Local().serialize(),
            "run_config": KubernetesRun().serialize(),
            "id": "id",
        }),
        "id":
        "id",
    })
    image = get_flow_image(flow_run)
    assert image == "prefecthq/prefect:all_extras-0.13.0"
示例#6
0
def test_get_flow_image_run_config_default_value_from_core_version(version):
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "core_version": version,
            "storage": Local().serialize(),
            "run_config": KubernetesRun().serialize(),
            "id": "id",
        }),
        "id":
        "id",
    })
    image = get_flow_image(flow_run)
    expected_version = version.split("+")[0] if version else "latest"
    assert image == f"prefecthq/prefect:all_extras-{expected_version}"
示例#7
0
def test_get_flow_image_env_metadata():
    flow_run = GraphQLResult(
        {
            "flow": GraphQLResult(
                {
                    "storage": Local().serialize(),
                    "environment": LocalEnvironment(
                        metadata={"image": "repo/name:tag"}
                    ).serialize(),
                    "id": "id",
                }
            ),
            "id": "id",
        }
    )
    image = get_flow_image(flow_run=flow_run)
    assert image == "repo/name:tag"
示例#8
0
def test_get_flow_image_run_config_docker_storage(run_config):
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "storage":
            Docker(registry_url="test", image_name="name",
                   image_tag="tag").serialize(),
            "id":
            "id",
        }),
        "run_config":
        run_config.serialize() if run_config else None,
        "id":
        "id",
    })
    image = get_flow_image(flow_run)
    assert image == "test/name:tag"
示例#9
0
def test_get_flow_image_docker_storage():
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "storage":
            Docker(registry_url="test", image_name="name",
                   image_tag="tag").serialize(),
            "environment":
            LocalEnvironment().serialize(),
            "id":
            "id",
        }),
        "id":
        "id",
    })
    image = get_flow_image(flow_run=flow_run)
    assert image == "test/name:tag"
示例#10
0
文件: agent.py 项目: veseln/prefect
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on to a k8s cluster as jobs

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        import urllib3.exceptions

        self.logger.info("Deploying flow run {}".format(
            flow_run.id))  # type: ignore

        image = get_flow_image(flow_run=flow_run)

        job_spec = self.replace_job_spec_yaml(flow_run=flow_run, image=image)
        job_name = job_spec["metadata"]["name"]

        self.logger.debug("Creating namespaced job {}".format(job_name))
        attempts = 3
        while True:
            try:
                self.batch_client.create_namespaced_job(
                    namespace=self.namespace, body=job_spec)
                break
            except self.k8s_client.rest.ApiException as exc:
                if exc.status == 409:
                    # object already exists, previous submission was successful
                    # even though it errored
                    break
                raise
            except urllib3.exceptions.HTTPError:
                attempts -= 1
                if attempts == 0:
                    raise
                self.logger.warning("Error submitting job %s, retrying...",
                                    job_name,
                                    exc_info=True)
                time.sleep(1)

        self.logger.debug("Job {} created".format(job_name))

        return "Job {}".format(job_name)
示例#11
0
def test_get_flow_image_run_config_default(run_config, version, default):
    flow_run = GraphQLResult({
        "flow":
        GraphQLResult({
            "core_version": version,
            "storage": Local().serialize(),
            "id": "id",
        }),
        "run_config":
        run_config.serialize() if run_config else None,
        "id":
        "id",
    })
    if default is None:
        expected_version = version.split("+")[0] if version else "latest"
        expected = f"prefecthq/prefect:{expected_version}"
    else:
        expected = default

    image = get_flow_image(flow_run, default=default)
    assert image == expected
示例#12
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs to Fargate

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(flow_run.id))  # type: ignore

        # create copies of kwargs to apply overrides as needed
        flow_task_definition_kwargs = copy.deepcopy(self.task_definition_kwargs)
        flow_task_run_kwargs = copy.deepcopy(self.task_run_kwargs)
        flow_container_definitions_kwargs = copy.deepcopy(
            self.container_definitions_kwargs
        )

        # create task_definition_name dict for passing into verify method
        task_definition_dict = {}

        if self.use_external_kwargs:
            # override from  external kwargs
            self._override_kwargs(
                flow_run,
                flow_task_definition_kwargs,
                flow_task_run_kwargs,
                flow_container_definitions_kwargs,
            )

        # set proper task_definition_name and tags based on enable_task_revisions flag
        if self.enable_task_revisions:
            # set task definition name
            task_definition_dict["task_definition_name"] = slugify(flow_run.flow.name)
            self._add_flow_tags(flow_run, flow_task_definition_kwargs)

        else:
            task_definition_dict["task_definition_name"] = "prefect-task-{}".format(  # type: ignore
                flow_run.flow.id[:8]  # type: ignore
            )  # type: ignore

        image = get_flow_image(flow_run=flow_run)
        flow_run_command = get_flow_run_command(flow_run=flow_run)

        # check if task definition exists
        self.logger.debug("Checking for task definition")
        if not self._verify_task_definition_exists(flow_run, task_definition_dict):
            self.logger.debug("No task definition found")
            self._create_task_definition(
                image=image,
                flow_task_definition_kwargs=flow_task_definition_kwargs,
                container_definitions_kwargs=flow_container_definitions_kwargs,
                task_definition_name=task_definition_dict["task_definition_name"],
                flow_run_command=flow_run_command,
            )

        # run task
        task_arn = self._run_task(
            flow_run, flow_task_run_kwargs, task_definition_dict["task_definition_name"]
        )

        self.logger.debug("Run created for task {}".format(task_arn))

        return "Task ARN: {}".format(task_arn)
示例#13
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(
            flow_run.id)  # type: ignore
                         )

        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run=flow_run)

        if not self.no_pull and len(image.split("/")) > 1:
            self.logger.info("Pulling image {}...".format(image))

            pull_output = self.docker_client.pull(image,
                                                  stream=True,
                                                  decode=True)
            for line in pull_output:
                self.logger.debug(line)
            self.logger.info("Successfully pulled image {}...".format(image))

        # Create any named volumes (if they do not already exist)
        for named_volume_name in self.named_volumes:
            try:
                self.docker_client.inspect_volume(name=named_volume_name)
            except docker.errors.APIError:
                self.logger.debug(
                    "Creating named volume {}".format(named_volume_name))
                self.docker_client.create_volume(
                    name=named_volume_name,
                    driver="local",
                    labels={"prefect_created": "true"},
                )

        # Create a container
        self.logger.debug("Creating Docker container {}".format(image))

        host_config = {"auto_remove": True}  # type: dict
        container_mount_paths = self.container_mount_paths
        if container_mount_paths:
            host_config.update(binds=self.host_spec)

        if sys.platform.startswith("linux") and self.docker_interface:
            docker_internal_ip = get_docker_ip()
            host_config.update(
                extra_hosts={"host.docker.internal": docker_internal_ip})

        networking_config = None
        if self.network:
            networking_config = self.docker_client.create_networking_config(
                {self.network: self.docker_client.create_endpoint_config()})

        container = self.docker_client.create_container(
            image,
            command="prefect execute cloud-flow",
            environment=env_vars,
            volumes=container_mount_paths,
            host_config=self.docker_client.create_host_config(**host_config),
            networking_config=networking_config,
        )

        # Start the container
        self.logger.debug("Starting Docker container with ID {}".format(
            container.get("Id")))
        if self.network:
            self.logger.debug("Adding container to docker network: {}".format(
                self.network))

        self.docker_client.start(container=container.get("Id"))

        if self.show_flow_logs:
            proc = multiprocessing.Process(
                target=self.stream_container_logs,
                kwargs={"container_id": container.get("Id")},
            )

            proc.start()
            self.processes.append(proc)

        self.logger.debug("Docker container {} started".format(
            container.get("Id")))

        return "Container ID: {}".format(container.get("Id"))
示例#14
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(
            flow_run.id))  # type: ignore

        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

        if getattr(flow_run.flow, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if not isinstance(run_config, DockerRun):
                self.logger.error(
                    "Flow run %s has a `run_config` of type `%s`, only `DockerRun` is supported",
                    flow_run.id,
                    type(run_config).__name__,
                )
                raise TypeError("Unsupported RunConfig type: %s" %
                                type(run_config).__name__)
        else:
            run_config = None

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run, run_config=run_config)

        if not self.no_pull and len(image.split("/")) > 1:
            self.logger.info("Pulling image {}...".format(image))
            registry = image.split("/")[0]
            if self.reg_allow_list and registry not in self.reg_allow_list:
                self.logger.error(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
                raise ValueError(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
            else:
                pull_output = self.docker_client.pull(image,
                                                      stream=True,
                                                      decode=True)
                for line in pull_output:
                    self.logger.debug(line)
                self.logger.info(
                    "Successfully pulled image {}...".format(image))

        # Create any named volumes (if they do not already exist)
        for named_volume_name in self.named_volumes:
            try:
                self.docker_client.inspect_volume(name=named_volume_name)
            except docker.errors.APIError:
                self.logger.debug(
                    "Creating named volume {}".format(named_volume_name))
                self.docker_client.create_volume(
                    name=named_volume_name,
                    driver="local",
                    labels={"prefect_created": "true"},
                )

        # Create a container
        self.logger.debug("Creating Docker container {}".format(image))

        host_config = {"auto_remove": True}  # type: dict
        container_mount_paths = self.container_mount_paths
        if container_mount_paths:
            host_config.update(binds=self.host_spec)

        if sys.platform.startswith("linux") and self.docker_interface:
            docker_internal_ip = get_docker_ip()
            host_config.update(
                extra_hosts={"host.docker.internal": docker_internal_ip})

        networking_config = None
        if self.network:
            networking_config = self.docker_client.create_networking_config(
                {self.network: self.docker_client.create_endpoint_config()})

        container = self.docker_client.create_container(
            image,
            command=get_flow_run_command(flow_run),
            environment=env_vars,
            volumes=container_mount_paths,
            host_config=self.docker_client.create_host_config(**host_config),
            networking_config=networking_config,
        )

        # Start the container
        self.logger.debug("Starting Docker container with ID {}".format(
            container.get("Id")))
        if self.network:
            self.logger.debug("Adding container to docker network: {}".format(
                self.network))

        self.docker_client.start(container=container.get("Id"))

        if self.show_flow_logs:
            self.stream_flow_logs(container.get("Id"))

        self.logger.debug("Docker container {} started".format(
            container.get("Id")))

        return "Container ID: {}".format(container.get("Id"))
示例#15
0
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(flow_run.id))  # type: ignore

        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

<<<<<<< HEAD
        run_config = self._get_run_config(flow_run, DockerRun)
        assert run_config is None or isinstance(run_config, DockerRun)  # mypy

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run, image, run_config=run_config)
=======
        if getattr(flow_run.flow, "run_config", None) is not None:
            run_config = RunConfigSchema().load(flow_run.flow.run_config)
            if not isinstance(run_config, DockerRun):
                self.logger.error(
                    "Flow run %s has a `run_config` of type `%s`, only `DockerRun` is supported",
                    flow_run.id,
                    type(run_config).__name__,
                )
                raise TypeError(
                    "Unsupported RunConfig type: %s" % type(run_config).__name__
                )
        else:
            run_config = None
示例#16
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        self.logger.info("Deploying flow run {}".format(
            flow_run.id))  # type: ignore

        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

        run_config = self._get_run_config(flow_run, DockerRun)
        assert run_config is None or isinstance(run_config, DockerRun)  # mypy

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run,
                                          image,
                                          run_config=run_config)

        if not self.no_pull and len(image.split("/")) > 1:
            self.logger.info("Pulling image {}...".format(image))
            registry = image.split("/")[0]
            if self.reg_allow_list and registry not in self.reg_allow_list:
                self.logger.error(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
                raise ValueError(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
            else:
                pull_output = self.docker_client.pull(image,
                                                      stream=True,
                                                      decode=True)
                for line in pull_output:
                    self.logger.debug(line)
                self.logger.info(
                    "Successfully pulled image {}...".format(image))

        # Create any named volumes (if they do not already exist)
        for named_volume_name in self.named_volumes:
            try:
                self.docker_client.inspect_volume(name=named_volume_name)
            except docker.errors.APIError:
                self.logger.debug(
                    "Creating named volume {}".format(named_volume_name))
                self.docker_client.create_volume(
                    name=named_volume_name,
                    driver="local",
                    labels={"prefect_created": "true"},
                )

        # Create a container
        self.logger.debug("Creating Docker container {}".format(image))

        host_config = {"auto_remove": True}  # type: dict
        container_mount_paths = self.container_mount_paths
        if container_mount_paths:
            host_config.update(binds=self.host_spec)

        if sys.platform.startswith("linux") and self.docker_interface:
            docker_internal_ip = get_docker_ip()
            host_config.update(
                extra_hosts={"host.docker.internal": docker_internal_ip})

        networking_config = None
        # At the time of creation, you can only connect a container to a single network,
        # however you can create more connections after creation.
        # Connect first network in the creation step. If no network is connected here the container
        # is connected to the default `bridge` network.
        # The rest of the networks are connected after creation.
        if self.networks:
            networking_config = self.docker_client.create_networking_config({
                self.networks[0]:
                self.docker_client.create_endpoint_config()
            })
        # Try fallback on old, deprecated, behaviour.
        if self.network:
            networking_config = self.docker_client.create_networking_config(
                {self.network: self.docker_client.create_endpoint_config()})
        labels = {
            "io.prefect.flow-name": flow_run.flow.name,
            "io.prefect.flow-id": flow_run.flow.id,
            "io.prefect.flow-run-id": flow_run.id,
        }
        container = self.docker_client.create_container(
            image,
            command=get_flow_run_command(flow_run),
            environment=env_vars,
            volumes=container_mount_paths,
            host_config=self.docker_client.create_host_config(**host_config),
            networking_config=networking_config,
            labels=labels,
        )
        # Connect the rest of the networks
        if self.networks:
            for network in self.networks[1:]:
                self.docker_client.connect_container_to_network(
                    container=container, net_id=network)
        # Start the container
        self.logger.debug("Starting Docker container with ID {}".format(
            container.get("Id")))
        if self.networks:
            self.logger.debug(
                "Adding container with ID {} to docker networks: {}.".format(
                    container.get("Id"), self.networks))
        if self.network:
            self.logger.debug("Adding container to docker network: {}".format(
                self.network))

        self.docker_client.start(container=container.get("Id"))

        if self.show_flow_logs:
            self.stream_flow_logs(container.get("Id"))

        self.logger.debug("Docker container {} started".format(
            container.get("Id")))

        return "Container ID: {}".format(container.get("Id"))
示例#17
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

        run_config = self._get_run_config(flow_run, DockerRun)
        assert run_config is None or isinstance(run_config, DockerRun)  # mypy

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run,
                                          image,
                                          run_config=run_config)

        if not self.no_pull and len(image.split("/")) > 1:
            self.logger.info("Pulling image {}...".format(image))
            registry = image.split("/")[0]
            if self.reg_allow_list and registry not in self.reg_allow_list:
                self.logger.error(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
                raise ValueError(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
            else:
                pull_output = self.docker_client.pull(image,
                                                      stream=True,
                                                      decode=True)
                for line in pull_output:
                    self.logger.debug(line)
                self.logger.info("Successfully pulled image {}".format(image))

        # Create any named volumes (if they do not already exist)
        for named_volume_name in self.named_volumes:
            try:
                self.docker_client.inspect_volume(name=named_volume_name)
            except docker.errors.APIError:
                self.logger.debug(
                    "Creating named volume {}".format(named_volume_name))
                self.docker_client.create_volume(
                    name=named_volume_name,
                    driver="local",
                    labels={"prefect_created": "true"},
                )

        # Create a container
        self.logger.debug("Creating Docker container {}".format(image))

        # By default, auto-remove containers
        host_config: Dict[str, Any] = {"auto_remove": True}
        # By default, no ports
        ports = None

        # Set up a host gateway for local communication; check the docker version since
        # this is not supported by older versions
        docker_engine_version = parse_version(
            self.docker_client.version()["Version"])
        host_gateway_version = parse_version("20.10.0")

        if docker_engine_version < host_gateway_version:
            warnings.warn(
                "`host.docker.internal` could not be automatically resolved to your "
                "local host. This feature is not supported on Docker Engine "
                f"v{docker_engine_version}, upgrade to v{host_gateway_version}+ if you "
                "encounter issues.")
        else:
            # Compatibility for linux -- https://github.com/docker/cli/issues/2290
            # Only supported by Docker v20.10.0+ which is our minimum recommend version
            host_config["extra_hosts"] = {
                "host.docker.internal": "host-gateway"
            }

        container_mount_paths = self.container_mount_paths
        if container_mount_paths:
            host_config.update(binds=self.host_spec)
        if run_config is not None and run_config.host_config:
            # The host_config passed from the run_config will overwrite defaults
            host_config.update(run_config.host_config)
        if run_config is not None and run_config.ports:
            ports = run_config.ports

        networking_config = None
        # At the time of creation, you can only connect a container to a single network,
        # however you can create more connections after creation.
        # Connect first network in the creation step. If no network is connected here the container
        # is connected to the default `bridge` network.
        # The rest of the networks are connected after creation.
        if self.networks:
            networking_config = self.docker_client.create_networking_config({
                self.networks[0]:
                self.docker_client.create_endpoint_config()
            })
        labels = {
            "io.prefect.flow-name": flow_run.flow.name,
            "io.prefect.flow-id": flow_run.flow.id,
            "io.prefect.flow-run-id": flow_run.id,
        }

        # Generate a container name to match the flow run name, ensuring it is docker
        # compatible and unique. Must match `[a-zA-Z0-9][a-zA-Z0-9_.-]+` in the end
        container_name = slugified_name = (
            slugify(
                flow_run.name,
                lowercase=False,
                # Docker does not limit length but URL limits apply eventually so
                # limit the length for safety
                max_length=250,
                # Docker allows these characters for container names
                regex_pattern=r"[^a-zA-Z0-9_.-]+",
            ).lstrip(
                # Docker does not allow leading underscore, dash, or period
                "_-.")
            # Docker does not allow 0 character names so use the flow run id if name
            # would be empty after cleaning
            or flow_run.id)

        # Create the container with retries on name conflicts
        index = 0  # will be bumped on name colissions
        while True:
            try:
                container = self.docker_client.create_container(
                    image,
                    command=get_flow_run_command(flow_run),
                    environment=env_vars,
                    name=container_name,
                    volumes=container_mount_paths,
                    host_config=self.docker_client.create_host_config(
                        **host_config),
                    networking_config=networking_config,
                    labels=labels,
                    ports=ports,
                )
            except docker.errors.APIError as exc:
                if "Conflict" in str(exc) and "container name" in str(exc):
                    index += 1
                    container_name = f"{slugified_name}-{index}"
                else:
                    raise
            else:
                break

        # Connect the rest of the networks
        if self.networks:
            for network in self.networks[1:]:
                self.docker_client.connect_container_to_network(
                    container=container, net_id=network)
        # Start the container
        self.logger.debug(
            f"Starting Docker container with ID {container.get('Id')} and "
            f"name {container_name!r}")
        if self.networks:
            self.logger.debug(
                "Adding container with ID {} to docker networks: {}.".format(
                    container.get("Id"), self.networks))
        self.docker_client.start(container=container.get("Id"))

        if self.show_flow_logs:
            self.stream_flow_logs(container.get("Id"))

        self.logger.debug("Docker container {} started".format(
            container.get("Id")))

        return "Container ID: {}".format(container.get("Id"))
示例#18
0
    def generate_task_definition(self, flow_run: GraphQLResult,
                                 run_config: ECSRun) -> Dict[str, Any]:
        """Generate an ECS task definition from a flow run

        Args:
            - flow_run (GraphQLResult): A flow run object
            - run_config (ECSRun): The flow's run config

        Returns:
            - dict: a dictionary representation of an ECS task definition
        """
        if run_config.task_definition:
            taskdef = deepcopy(run_config.task_definition)
        elif run_config.task_definition_path:
            self.logger.debug(
                "Loading task definition template from %r",
                run_config.task_definition_path,
            )
            template_bytes = read_bytes_from_path(
                run_config.task_definition_path)
            taskdef = yaml.safe_load(template_bytes)
        else:
            taskdef = deepcopy(self.task_definition)
        slug = slugify.slugify(
            f"{flow_run.flow.name}-{flow_run.id}",
            max_length=255 - len("prefect-"),
            word_boundary=True,
            save_order=True,
        )
        taskdef["family"] = f"prefect-{slug}"

        # Add some metadata tags for easier tracking by users
        taskdef.setdefault("tags", []).extend([
            {
                "key": "prefect:flow-id",
                "value": flow_run.flow.id
            },
            {
                "key": "prefect:flow-version",
                "value": str(flow_run.flow.version)
            },
        ])

        # Get the flow container (creating one if it doesn't already exist)
        containers = taskdef.setdefault("containerDefinitions", [])
        for container in containers:
            if container.get("name") == "flow":
                break
        else:
            container = {"name": "flow"}
            containers.append(container)

        # Set flow image
        container["image"] = image = get_flow_image(
            flow_run, default=container.get("image"))

        # Add `PREFECT__CONTEXT__IMAGE` environment variable
        env = {"PREFECT__CONTEXT__IMAGE": image}
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("environment", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["environment"] = container_env

        # Ensure that cpu/memory are strings not integers
        if "cpu" in taskdef:
            taskdef["cpu"] = str(taskdef["cpu"])
        if "memory" in taskdef:
            taskdef["memory"] = str(taskdef["memory"])

        # If we're using Fargate, we need to explicitly set an executionRoleArn on the
        # task definition. If one isn't present, then try to load it from the run_config
        # and then the agent's default.
        if "executionRoleArn" not in taskdef:
            if run_config.execution_role_arn:
                taskdef["executionRoleArn"] = run_config.execution_role_arn
            elif self.execution_role_arn:
                taskdef["executionRoleArn"] = self.execution_role_arn

        # Set requiresCompatibilities if not already set if self.launch_type is set
        if "requiresCompatibilities" not in taskdef and self.launch_type:
            taskdef["requiresCompatibilities"] = [self.launch_type]

        return taskdef
示例#19
0
    def deploy_flow(self, flow_run: GraphQLResult) -> str:
        """
        Deploy flow runs on your local machine as Docker containers

        Args:
            - flow_run (GraphQLResult): A GraphQLResult flow run object

        Returns:
            - str: Information about the deployment
        """
        # 'import docker' is expensive time-wise, we should do this just-in-time to keep
        # the 'import prefect' time low
        import docker

        run_config = self._get_run_config(flow_run, DockerRun)
        assert run_config is None or isinstance(run_config, DockerRun)  # mypy

        image = get_flow_image(flow_run=flow_run)
        env_vars = self.populate_env_vars(flow_run,
                                          image,
                                          run_config=run_config)

        if not self.no_pull and len(image.split("/")) > 1:
            self.logger.info("Pulling image {}...".format(image))
            registry = image.split("/")[0]
            if self.reg_allow_list and registry not in self.reg_allow_list:
                self.logger.error(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
                raise ValueError(
                    "Trying to pull image from a Docker registry '{}' which"
                    " is not in the reg_allow_list".format(registry))
            else:
                pull_output = self.docker_client.pull(image,
                                                      stream=True,
                                                      decode=True)
                for line in pull_output:
                    self.logger.debug(line)
                self.logger.info("Successfully pulled image {}".format(image))

        # Create any named volumes (if they do not already exist)
        for named_volume_name in self.named_volumes:
            try:
                self.docker_client.inspect_volume(name=named_volume_name)
            except docker.errors.APIError:
                self.logger.debug(
                    "Creating named volume {}".format(named_volume_name))
                self.docker_client.create_volume(
                    name=named_volume_name,
                    driver="local",
                    labels={"prefect_created": "true"},
                )

        # Create a container
        self.logger.debug("Creating Docker container {}".format(image))

        host_config = {"auto_remove": True}  # type: dict
        container_mount_paths = self.container_mount_paths
        if container_mount_paths:
            host_config.update(binds=self.host_spec)

        networking_config = None
        # At the time of creation, you can only connect a container to a single network,
        # however you can create more connections after creation.
        # Connect first network in the creation step. If no network is connected here the container
        # is connected to the default `bridge` network.
        # The rest of the networks are connected after creation.
        if self.networks:
            networking_config = self.docker_client.create_networking_config({
                self.networks[0]:
                self.docker_client.create_endpoint_config()
            })
        # Try fallback on old, deprecated, behaviour.
        if self.network:
            networking_config = self.docker_client.create_networking_config(
                {self.network: self.docker_client.create_endpoint_config()})
        labels = {
            "io.prefect.flow-name": flow_run.flow.name,
            "io.prefect.flow-id": flow_run.flow.id,
            "io.prefect.flow-run-id": flow_run.id,
        }

        # Generate a container name to match the flow run name, ensuring it is docker
        # compatible and unique. Must match `[a-zA-Z0-9][a-zA-Z0-9_.-]+` in the end
        container_name = slugified_name = (
            slugify(
                flow_run.name,
                lowercase=False,
                # Docker does not limit length but URL limits apply eventually so
                # limit the length for safety
                max_length=250,
                # Docker allows these characters for container names
                regex_pattern=r"[^a-zA-Z0-9_.-]+",
            ).lstrip(
                # Docker does not allow leading underscore, dash, or period
                "_-.")
            # Docker does not allow 0 character names so use the flow run id if name
            # would be empty after cleaning
            or flow_run.id)

        # Create the container with retries on name conflicts
        index = 0  # will be bumped on name colissions
        while True:
            try:
                container = self.docker_client.create_container(
                    image,
                    command=get_flow_run_command(flow_run),
                    environment=env_vars,
                    name=container_name,
                    volumes=container_mount_paths,
                    host_config=self.docker_client.create_host_config(
                        **host_config),
                    networking_config=networking_config,
                    labels=labels,
                )
            except docker.errors.APIError as exc:
                if "Conflict" in str(exc) and "container name" in str(exc):
                    index += 1
                    container_name = f"{slugified_name}-{index}"
                else:
                    raise
            else:
                break

        # Connect the rest of the networks
        if self.networks:
            for network in self.networks[1:]:
                self.docker_client.connect_container_to_network(
                    container=container, net_id=network)
        # Start the container
        self.logger.debug(
            f"Starting Docker container with ID {container.get('Id')} and "
            f"name {container_name!r}")
        if self.networks:
            self.logger.debug(
                "Adding container with ID {} to docker networks: {}.".format(
                    container.get("Id"), self.networks))
        if self.network:
            self.logger.debug("Adding container to docker network: {}".format(
                self.network))

        self.docker_client.start(container=container.get("Id"))

        if self.show_flow_logs:
            self.stream_flow_logs(container.get("Id"))

        self.logger.debug("Docker container {} started".format(
            container.get("Id")))

        return "Container ID: {}".format(container.get("Id"))
示例#20
0
    def generate_job_spec_from_environment(self,
                                           flow_run: GraphQLResult,
                                           image: str = None) -> dict:
        """
        Populate a k8s job spec. This spec defines a k8s job that handles
        executing a flow. This method runs each time the agent receives
        a flow to run.

        That job spec can optionally be customized by setting the
        following environment variables on the agent.

        - `NAMESPACE`: the k8s namespace the job will run in, defaults to `"default"`
        - `JOB_MEM_REQUEST`: memory requested, for example, `256Mi` for 256 MB. If this
                environment variable is not set, the cluster's defaults will be used.
        - `JOB_MEM_LIMIT`: memory limit, for example, `512Mi` For 512 MB. If this
                environment variable is not set, the cluster's defaults will be used.
        - `JOB_CPU_REQUEST`: CPU requested, defaults to `"100m"`
        - `JOB_CPU_LIMIT`: CPU limit, defaults to `"100m"`
        - `IMAGE_PULL_POLICY`: policy for pulling images. Defaults to `"IfNotPresent"`.
        - `IMAGE_PULL_SECRETS`: name of an existing k8s secret that can be used to pull
                images. This is necessary if your flow uses an image that is in a non-public
                container registry, such as Amazon ECR, or in a public registry that requires
                authentication to avoid hitting rate limits. To specify multiple image pull
                secrets, provide a comma-delimited string with no spaces, like
                `"some-secret,other-secret"`.
        - `SERVICE_ACCOUNT_NAME`: name of a service account to run the job as.
                By default, none is specified.
        - `YAML_TEMPLATE`: a path to where the YAML template should be loaded from. defaults
                to the embedded `job_spec.yaml`.

        Args:
            - flow_run (GraphQLResult): A flow run object
            - image (str, optional): The full name of an image to use for the job

        Returns:
            - dict: a dictionary representation of a k8s job for flow execution
        """
        identifier = str(uuid.uuid4())[:8]
        yaml_path = os.getenv(
            "YAML_TEMPLATE",
            os.path.join(os.path.dirname(__file__), "job_spec.yaml"))
        with open(yaml_path, "r") as job_file:
            job = yaml.safe_load(job_file)

        job_name = "prefect-job-{}".format(identifier)

        # Populate job metadata for identification
        k8s_labels = {
            "prefect.io/identifier": identifier,
            "prefect.io/flow_run_id": flow_run.id,  # type: ignore
            "prefect.io/flow_id": flow_run.flow.id,  # type: ignore
        }
        job["metadata"]["name"] = job_name
        job["metadata"]["labels"].update(**k8s_labels)
        job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels)

        # Use provided image for job
        if image is None:
            image = get_flow_image(flow_run=flow_run)
        job["spec"]["template"]["spec"]["containers"][0]["image"] = image

        self.logger.debug("Using image {} for job".format(image))

        # Datermine flow run command
        job["spec"]["template"]["spec"]["containers"][0]["args"] = [
            get_flow_run_command(flow_run)
        ]

        # Populate environment variables for flow run execution
        env = job["spec"]["template"]["spec"]["containers"][0]["env"]

        env[0]["value"] = config.cloud.api or "https://api.prefect.io"
        env[1]["value"] = config.cloud.agent.auth_token
        env[2]["value"] = flow_run.id  # type: ignore
        env[3]["value"] = flow_run.flow.id  # type: ignore
        env[4]["value"] = self.namespace
        env[5]["value"] = str(self.labels)
        env[6]["value"] = str(self.log_to_cloud).lower()
        env[7]["value"] = self.env_vars.get("PREFECT__LOGGING__LEVEL",
                                            config.logging.level)

        # append all user provided values
        for key, value in self.env_vars.items():
            env.append(dict(name=key, value=value))

        # Use image pull secrets if provided
        if self.image_pull_secrets:
            for idx, secret_name in enumerate(self.image_pull_secrets):
                # this check preserves behavior from previous releases,
                # where prefect would only overwrite the first entry in
                # imagePullSecrets
                if idx == 0:
                    job["spec"]["template"]["spec"]["imagePullSecrets"][0] = {
                        "name": secret_name
                    }
                else:
                    job["spec"]["template"]["spec"]["imagePullSecrets"].append(
                        {"name": secret_name})
        else:
            del job["spec"]["template"]["spec"]["imagePullSecrets"]

        # Set resource requirements if provided
        resources = job["spec"]["template"]["spec"]["containers"][0][
            "resources"]
        if os.getenv("JOB_MEM_REQUEST"):
            resources["requests"]["memory"] = os.getenv("JOB_MEM_REQUEST")
        if os.getenv("JOB_MEM_LIMIT"):
            resources["limits"]["memory"] = os.getenv("JOB_MEM_LIMIT")
        if os.getenv("JOB_CPU_REQUEST"):
            resources["requests"]["cpu"] = os.getenv("JOB_CPU_REQUEST")
        if os.getenv("JOB_CPU_LIMIT"):
            resources["limits"]["cpu"] = os.getenv("JOB_CPU_LIMIT")
        if self.volume_mounts:
            job["spec"]["template"]["spec"]["containers"][0][
                "volumeMounts"] = self.volume_mounts
        else:
            del job["spec"]["template"]["spec"]["containers"][0][
                "volumeMounts"]
        if self.volumes:
            job["spec"]["template"]["spec"]["volumes"] = self.volumes
        else:
            del job["spec"]["template"]["spec"]["volumes"]
        if os.getenv("IMAGE_PULL_POLICY"):
            job["spec"]["template"]["spec"]["containers"][0][
                "imagePullPolicy"] = os.getenv("IMAGE_PULL_POLICY")
        if self.service_account_name:
            job["spec"]["template"]["spec"][
                "serviceAccountName"] = self.service_account_name

        return job
示例#21
0
    def generate_task_definition(self, flow_run: GraphQLResult,
                                 run_config: ECSRun) -> Dict[str, Any]:
        """Generate an ECS task definition from a flow run

        Args:
            - flow_run (GraphQLResult): A flow run object
            - run_config (ECSRun): The flow's run config

        Returns:
            - dict: a dictionary representation of an ECS task definition
        """
        if run_config.task_definition:
            taskdef = deepcopy(run_config.task_definition)
        elif run_config.task_definition_path:
            self.logger.debug(
                "Loading task definition template from %r",
                run_config.task_definition_path,
            )
            template_bytes = read_bytes_from_path(
                run_config.task_definition_path)
            taskdef = yaml.safe_load(template_bytes)
        else:
            taskdef = deepcopy(self.task_definition)

        slug = slugify.slugify(
            flow_run.flow.name,
            max_length=255 - len("prefect-"),
            word_boundary=True,
            save_order=True,
        )
        family = f"prefect-{slug}"

        tags = self.get_task_definition_tags(flow_run)

        taskdef["family"] = family

        taskdef_tags = [{"key": k, "value": v} for k, v in tags.items()]
        for entry in taskdef.get("tags", []):
            if entry["key"] not in tags:
                taskdef_tags.append(entry)
        taskdef["tags"] = taskdef_tags

        # Get the flow container (creating one if it doesn't already exist)
        containers = taskdef.setdefault("containerDefinitions", [])
        for container in containers:
            if container.get("name") == "flow":
                break
        else:
            container = {"name": "flow"}
            containers.append(container)

        # Set flow image
        container["image"] = image = get_flow_image(flow_run)

        # Set flow run command
        container["command"] = [
            "/bin/sh", "-c", get_flow_run_command(flow_run)
        ]

        # Set taskRoleArn if configured
        if run_config.task_role_arn:
            taskdef["taskRoleArn"] = run_config.task_role_arn

        # Populate static environment variables from the following sources,
        # with precedence:
        # - Static environment variables, hardcoded below
        # - Values in the task definition template
        env = {
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        }
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("environment", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["environment"] = container_env

        # Set resource requirements, if provided
        # Also ensure that cpu/memory are strings not integers
        if run_config.cpu:
            taskdef["cpu"] = str(run_config.cpu)
        elif "cpu" in taskdef:
            taskdef["cpu"] = str(taskdef["cpu"])
        if run_config.memory:
            taskdef["memory"] = str(run_config.memory)
        elif "memory" in taskdef:
            taskdef["memory"] = str(taskdef["memory"])

        return taskdef
示例#22
0
    def generate_job_spec_from_run_config(self, flow_run: GraphQLResult,
                                          run_config: KubernetesRun) -> dict:
        """Generate a k8s job spec for a flow run.

        Args:
            - flow_run (GraphQLResult): A flow run object
            - run_config (KubernetesRun): The flow run's run_config

        Returns:
            - dict: a dictionary representation of a k8s job for flow execution
        """
        if run_config.job_template:
            job = run_config.job_template
        else:
            job_template_path = run_config.job_template_path or self.job_template_path
            self.logger.debug("Loading job template from %r",
                              job_template_path)
            template_bytes = read_bytes_from_path(job_template_path)
            job = yaml.safe_load(template_bytes)

        identifier = uuid.uuid4().hex[:8]

        job_name = f"prefect-job-{identifier}"

        # Populate job metadata for identification
        k8s_labels = {
            "prefect.io/identifier": identifier,
            "prefect.io/flow_run_id": flow_run.id,  # type: ignore
            "prefect.io/flow_id": flow_run.flow.id,  # type: ignore
        }
        _get_or_create(job, "metadata.labels")
        _get_or_create(job, "spec.template.metadata.labels")
        job["metadata"]["name"] = job_name
        job["metadata"]["labels"].update(**k8s_labels)
        job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels)
        pod_spec = job["spec"]["template"]["spec"]

        # Configure `service_account_name` if specified
        if run_config.service_account_name is not None:
            # On run-config, always override
            service_account_name = (run_config.service_account_name
                                    )  # type: Optional[str]
        elif "serviceAccountName" in pod_spec and (
                run_config.job_template or run_config.job_template_path):
            # On run-config job-template, no override
            service_account_name = None
        else:
            # Use agent value, if provided
            service_account_name = self.service_account_name
        if service_account_name is not None:
            pod_spec["serviceAccountName"] = service_account_name

        # Configure `image_pull_secrets` if specified
        if run_config.image_pull_secrets is not None:
            # On run-config, always override
            image_pull_secrets = (run_config.image_pull_secrets
                                  )  # type: Optional[Iterable[str]]
        elif "imagePullSecrets" in pod_spec and (run_config.job_template or
                                                 run_config.job_template_path):
            # On run-config job template, no override
            image_pull_secrets = None
        else:
            # Use agent, if provided
            image_pull_secrets = self.image_pull_secrets
        if image_pull_secrets is not None:
            pod_spec["imagePullSecrets"] = [{
                "name": s
            } for s in image_pull_secrets]

        # Default restartPolicy to Never
        _get_or_create(job, "spec.template.spec.restartPolicy", "Never")

        # Get the first container, which is used for the prefect job
        containers = _get_or_create(job, "spec.template.spec.containers", [])
        if not containers:
            containers.append({})
        container = containers[0]

        # Set container image
        container["image"] = image = get_flow_image(
            flow_run, default=container.get("image"))

        # Set flow run command
        container["args"] = get_flow_run_command(flow_run).split()

        # Populate environment variables from the following sources,
        # with precedence:
        # - Values required for flow execution, hardcoded below
        # - Values set on the KubernetesRun object
        # - Values set using the `--env` CLI flag on the agent
        # - Values in the job template
        env = {"PREFECT__LOGGING__LEVEL": config.logging.level}
        env.update(self.env_vars)
        if run_config.env:
            env.update(run_config.env)
        env.update({
            "PREFECT__BACKEND":
            config.backend,
            "PREFECT__CLOUD__AGENT__LABELS":
            str(self.labels),
            "PREFECT__CLOUD__API":
            config.cloud.api,
            "PREFECT__CLOUD__AUTH_TOKEN":
            config.cloud.agent.auth_token,
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__FLOW_RUN_ID":
            flow_run.id,
            "PREFECT__CONTEXT__FLOW_ID":
            flow_run.flow.id,
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__LOGGING__LOG_TO_CLOUD":
            str(self.log_to_cloud).lower(),
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        })
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("env", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["env"] = container_env

        # Set resource requirements if provided
        _get_or_create(container, "resources.requests")
        _get_or_create(container, "resources.limits")
        resources = container["resources"]
        if run_config.memory_request:
            resources["requests"]["memory"] = run_config.memory_request
        if run_config.memory_limit:
            resources["limits"]["memory"] = run_config.memory_limit
        if run_config.cpu_request:
            resources["requests"]["cpu"] = run_config.cpu_request
        if run_config.cpu_limit:
            resources["limits"]["cpu"] = run_config.cpu_limit

        return job
示例#23
0
文件: agent.py 项目: strojank/prefect
    def generate_job_spec_from_run_config(self,
                                          flow_run: GraphQLResult) -> dict:
        """Generate a k8s job spec for a flow run.

        Args:
            - flow_run (GraphQLResult): A flow run object

        Returns:
            - dict: a dictionary representation of a k8s job for flow execution
        """
        run_config = RunConfigSchema().load(flow_run.flow.run_config)

        if run_config.job_template:
            job = run_config.job_template
        else:
            job_template_path = run_config.job_template_path or self.job_template_path
            self.logger.debug("Loading job template from %r",
                              job_template_path)
            template_bytes = read_bytes_from_path(job_template_path)
            job = yaml.safe_load(template_bytes)

        identifier = uuid.uuid4().hex[:8]

        job_name = f"prefect-job-{identifier}"

        # Populate job metadata for identification
        k8s_labels = {
            "prefect.io/identifier": identifier,
            "prefect.io/flow_run_id": flow_run.id,  # type: ignore
            "prefect.io/flow_id": flow_run.flow.id,  # type: ignore
        }
        _get_or_create(job, "metadata.labels")
        _get_or_create(job, "spec.template.metadata.labels")
        job["metadata"]["name"] = job_name
        job["metadata"]["labels"].update(**k8s_labels)
        job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels)

        # Get the first container, which is used for the prefect job
        containers = _get_or_create(job, "spec.template.spec.containers", [])
        if not containers:
            containers.append({})
        container = containers[0]

        # Set container image
        container["image"] = image = get_flow_image(flow_run)

        # Set flow run command
        container["args"] = [get_flow_run_command(flow_run)]

        # Populate environment variables from the following sources,
        # with precedence:
        # - Values required for flow execution, hardcoded below
        # - Values set on the KubernetesRun object
        # - Values set using the `--env` CLI flag on the agent
        # - Values in the job template
        env = self.env_vars.copy()
        if run_config.env:
            env.update(run_config.env)
        env.update({
            "PREFECT__CLOUD__API":
            config.cloud.api,
            "PREFECT__CLOUD__AUTH_TOKEN":
            config.cloud.agent.auth_token,
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__FLOW_RUN_ID":
            flow_run.id,
            "PREFECT__CONTEXT__FLOW_ID":
            flow_run.flow.id,
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__LOGGING__LOG_TO_CLOUD":
            str(self.log_to_cloud).lower(),
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        })
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("env", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["env"] = container_env

        # Set resource requirements if provided
        _get_or_create(container, "resources.requests")
        _get_or_create(container, "resources.limits")
        resources = container["resources"]
        if run_config.memory_request:
            resources["requests"]["memory"] = run_config.memory_request
        if run_config.memory_limit:
            resources["limits"]["memory"] = run_config.memory_limit
        if run_config.cpu_request:
            resources["requests"]["cpu"] = run_config.cpu_request
        if run_config.cpu_limit:
            resources["limits"]["cpu"] = run_config.cpu_limit

        return job