def test_get_flow_image_raises_on_missing_info(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "environment": LocalEnvironment().serialize(), "id": "id", }), "id": "id", }) with pytest.raises(ValueError): get_flow_image(flow_run=flow_run)
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on to a k8s cluster as jobs Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ self.logger.info("Deploying flow run {}".format(flow_run.id)) # type: ignore image = get_flow_image(flow_run=flow_run) job_spec = self.replace_job_spec_yaml(flow_run=flow_run, image=image) self.logger.debug( "Creating namespaced job {}".format(job_spec["metadata"]["name"]) ) job = self.batch_client.create_namespaced_job( namespace=self.namespace, body=job_spec ) self.logger.debug("Job {} created".format(job.metadata.name)) return "Job {}".format(job.metadata.name)
def generate_task_definition(self, flow_run: GraphQLResult) -> Dict[str, Any]: """Generate an Vertex task definition from a flow run Args: - flow_run (GraphQLResult): A flow run object Returns: - dict: a dictionary representation of an Vertex task definition """ run_config = self._get_run_config(flow_run, VertexRun) assert isinstance(run_config, VertexRun) # mypy image = get_flow_image(flow_run) job_name = slugify.slugify( flow_run.flow.name + "-" + flow_run.name, max_length=255, word_boundary=True, save_order=True, ) machine_type = run_config.machine_type command = get_flow_run_command(flow_run) env = self.populate_env_vars(flow_run) env_list = self._to_env_list(env) # Start with a default taskdef taskdef = { "display_name": job_name, "job_spec": { "worker_pool_specs": [ {"machine_spec": {"machine_type": machine_type}, "replica_count": 1} ] }, } # type: Dict[str, Any] if run_config.worker_pool_specs is not None: taskdef["job_spec"]["worker_pool_specs"] = run_config.worker_pool_specs if run_config.network is not None: taskdef["job_spec"]["network"] = run_config.network if run_config.service_account is not None: taskdef["job_spec"]["service_account"] = run_config.service_account else: taskdef["job_spec"]["service_account"] = self.service_account if run_config.scheduling is not None: taskdef["job_spec"]["scheduling"] = run_config.scheduling # We always set the container spec on the zeroth pool spec to ensure it will run the flow taskdef["job_spec"]["worker_pool_specs"][0]["container_spec"] = { "image_uri": image, "command": command.split(), "args": [], "env": env_list, } return taskdef
def test_get_flow_image_run_config_image_on_RunConfig(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "id": "id", }), "run_config": KubernetesRun(image="myfancyimage").serialize(), "id": "id", }) image = get_flow_image(flow_run) assert image == "myfancyimage"
def test_get_flow_image_run_config_default_value_from_core_version(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "core_version": "0.13.0", "storage": Local().serialize(), "run_config": KubernetesRun().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run) assert image == "prefecthq/prefect:all_extras-0.13.0"
def test_get_flow_image_run_config_default_value_from_core_version(version): flow_run = GraphQLResult({ "flow": GraphQLResult({ "core_version": version, "storage": Local().serialize(), "run_config": KubernetesRun().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run) expected_version = version.split("+")[0] if version else "latest" assert image == f"prefecthq/prefect:all_extras-{expected_version}"
def test_get_flow_image_env_metadata(): flow_run = GraphQLResult( { "flow": GraphQLResult( { "storage": Local().serialize(), "environment": LocalEnvironment( metadata={"image": "repo/name:tag"} ).serialize(), "id": "id", } ), "id": "id", } ) image = get_flow_image(flow_run=flow_run) assert image == "repo/name:tag"
def test_get_flow_image_run_config_docker_storage(run_config): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "id": "id", }), "run_config": run_config.serialize() if run_config else None, "id": "id", }) image = get_flow_image(flow_run) assert image == "test/name:tag"
def test_get_flow_image_docker_storage(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "environment": LocalEnvironment().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run=flow_run) assert image == "test/name:tag"
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on to a k8s cluster as jobs Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ import urllib3.exceptions self.logger.info("Deploying flow run {}".format( flow_run.id)) # type: ignore image = get_flow_image(flow_run=flow_run) job_spec = self.replace_job_spec_yaml(flow_run=flow_run, image=image) job_name = job_spec["metadata"]["name"] self.logger.debug("Creating namespaced job {}".format(job_name)) attempts = 3 while True: try: self.batch_client.create_namespaced_job( namespace=self.namespace, body=job_spec) break except self.k8s_client.rest.ApiException as exc: if exc.status == 409: # object already exists, previous submission was successful # even though it errored break raise except urllib3.exceptions.HTTPError: attempts -= 1 if attempts == 0: raise self.logger.warning("Error submitting job %s, retrying...", job_name, exc_info=True) time.sleep(1) self.logger.debug("Job {} created".format(job_name)) return "Job {}".format(job_name)
def test_get_flow_image_run_config_default(run_config, version, default): flow_run = GraphQLResult({ "flow": GraphQLResult({ "core_version": version, "storage": Local().serialize(), "id": "id", }), "run_config": run_config.serialize() if run_config else None, "id": "id", }) if default is None: expected_version = version.split("+")[0] if version else "latest" expected = f"prefecthq/prefect:{expected_version}" else: expected = default image = get_flow_image(flow_run, default=default) assert image == expected
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs to Fargate Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ self.logger.info("Deploying flow run {}".format(flow_run.id)) # type: ignore # create copies of kwargs to apply overrides as needed flow_task_definition_kwargs = copy.deepcopy(self.task_definition_kwargs) flow_task_run_kwargs = copy.deepcopy(self.task_run_kwargs) flow_container_definitions_kwargs = copy.deepcopy( self.container_definitions_kwargs ) # create task_definition_name dict for passing into verify method task_definition_dict = {} if self.use_external_kwargs: # override from external kwargs self._override_kwargs( flow_run, flow_task_definition_kwargs, flow_task_run_kwargs, flow_container_definitions_kwargs, ) # set proper task_definition_name and tags based on enable_task_revisions flag if self.enable_task_revisions: # set task definition name task_definition_dict["task_definition_name"] = slugify(flow_run.flow.name) self._add_flow_tags(flow_run, flow_task_definition_kwargs) else: task_definition_dict["task_definition_name"] = "prefect-task-{}".format( # type: ignore flow_run.flow.id[:8] # type: ignore ) # type: ignore image = get_flow_image(flow_run=flow_run) flow_run_command = get_flow_run_command(flow_run=flow_run) # check if task definition exists self.logger.debug("Checking for task definition") if not self._verify_task_definition_exists(flow_run, task_definition_dict): self.logger.debug("No task definition found") self._create_task_definition( image=image, flow_task_definition_kwargs=flow_task_definition_kwargs, container_definitions_kwargs=flow_container_definitions_kwargs, task_definition_name=task_definition_dict["task_definition_name"], flow_run_command=flow_run_command, ) # run task task_arn = self._run_task( flow_run, flow_task_run_kwargs, task_definition_dict["task_definition_name"] ) self.logger.debug("Run created for task {}".format(task_arn)) return "Task ARN: {}".format(task_arn)
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on your local machine as Docker containers Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ self.logger.info("Deploying flow run {}".format( flow_run.id) # type: ignore ) # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker image = get_flow_image(flow_run=flow_run) env_vars = self.populate_env_vars(flow_run=flow_run) if not self.no_pull and len(image.split("/")) > 1: self.logger.info("Pulling image {}...".format(image)) pull_output = self.docker_client.pull(image, stream=True, decode=True) for line in pull_output: self.logger.debug(line) self.logger.info("Successfully pulled image {}...".format(image)) # Create any named volumes (if they do not already exist) for named_volume_name in self.named_volumes: try: self.docker_client.inspect_volume(name=named_volume_name) except docker.errors.APIError: self.logger.debug( "Creating named volume {}".format(named_volume_name)) self.docker_client.create_volume( name=named_volume_name, driver="local", labels={"prefect_created": "true"}, ) # Create a container self.logger.debug("Creating Docker container {}".format(image)) host_config = {"auto_remove": True} # type: dict container_mount_paths = self.container_mount_paths if container_mount_paths: host_config.update(binds=self.host_spec) if sys.platform.startswith("linux") and self.docker_interface: docker_internal_ip = get_docker_ip() host_config.update( extra_hosts={"host.docker.internal": docker_internal_ip}) networking_config = None if self.network: networking_config = self.docker_client.create_networking_config( {self.network: self.docker_client.create_endpoint_config()}) container = self.docker_client.create_container( image, command="prefect execute cloud-flow", environment=env_vars, volumes=container_mount_paths, host_config=self.docker_client.create_host_config(**host_config), networking_config=networking_config, ) # Start the container self.logger.debug("Starting Docker container with ID {}".format( container.get("Id"))) if self.network: self.logger.debug("Adding container to docker network: {}".format( self.network)) self.docker_client.start(container=container.get("Id")) if self.show_flow_logs: proc = multiprocessing.Process( target=self.stream_container_logs, kwargs={"container_id": container.get("Id")}, ) proc.start() self.processes.append(proc) self.logger.debug("Docker container {} started".format( container.get("Id"))) return "Container ID: {}".format(container.get("Id"))
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on your local machine as Docker containers Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ self.logger.info("Deploying flow run {}".format( flow_run.id)) # type: ignore # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker if getattr(flow_run.flow, "run_config", None) is not None: run_config = RunConfigSchema().load(flow_run.flow.run_config) if not isinstance(run_config, DockerRun): self.logger.error( "Flow run %s has a `run_config` of type `%s`, only `DockerRun` is supported", flow_run.id, type(run_config).__name__, ) raise TypeError("Unsupported RunConfig type: %s" % type(run_config).__name__) else: run_config = None image = get_flow_image(flow_run=flow_run) env_vars = self.populate_env_vars(flow_run, run_config=run_config) if not self.no_pull and len(image.split("/")) > 1: self.logger.info("Pulling image {}...".format(image)) registry = image.split("/")[0] if self.reg_allow_list and registry not in self.reg_allow_list: self.logger.error( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) raise ValueError( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) else: pull_output = self.docker_client.pull(image, stream=True, decode=True) for line in pull_output: self.logger.debug(line) self.logger.info( "Successfully pulled image {}...".format(image)) # Create any named volumes (if they do not already exist) for named_volume_name in self.named_volumes: try: self.docker_client.inspect_volume(name=named_volume_name) except docker.errors.APIError: self.logger.debug( "Creating named volume {}".format(named_volume_name)) self.docker_client.create_volume( name=named_volume_name, driver="local", labels={"prefect_created": "true"}, ) # Create a container self.logger.debug("Creating Docker container {}".format(image)) host_config = {"auto_remove": True} # type: dict container_mount_paths = self.container_mount_paths if container_mount_paths: host_config.update(binds=self.host_spec) if sys.platform.startswith("linux") and self.docker_interface: docker_internal_ip = get_docker_ip() host_config.update( extra_hosts={"host.docker.internal": docker_internal_ip}) networking_config = None if self.network: networking_config = self.docker_client.create_networking_config( {self.network: self.docker_client.create_endpoint_config()}) container = self.docker_client.create_container( image, command=get_flow_run_command(flow_run), environment=env_vars, volumes=container_mount_paths, host_config=self.docker_client.create_host_config(**host_config), networking_config=networking_config, ) # Start the container self.logger.debug("Starting Docker container with ID {}".format( container.get("Id"))) if self.network: self.logger.debug("Adding container to docker network: {}".format( self.network)) self.docker_client.start(container=container.get("Id")) if self.show_flow_logs: self.stream_flow_logs(container.get("Id")) self.logger.debug("Docker container {} started".format( container.get("Id"))) return "Container ID: {}".format(container.get("Id"))
- flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ self.logger.info("Deploying flow run {}".format(flow_run.id)) # type: ignore # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker <<<<<<< HEAD run_config = self._get_run_config(flow_run, DockerRun) assert run_config is None or isinstance(run_config, DockerRun) # mypy image = get_flow_image(flow_run=flow_run) env_vars = self.populate_env_vars(flow_run, image, run_config=run_config) ======= if getattr(flow_run.flow, "run_config", None) is not None: run_config = RunConfigSchema().load(flow_run.flow.run_config) if not isinstance(run_config, DockerRun): self.logger.error( "Flow run %s has a `run_config` of type `%s`, only `DockerRun` is supported", flow_run.id, type(run_config).__name__, ) raise TypeError( "Unsupported RunConfig type: %s" % type(run_config).__name__ ) else: run_config = None
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on your local machine as Docker containers Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ self.logger.info("Deploying flow run {}".format( flow_run.id)) # type: ignore # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker run_config = self._get_run_config(flow_run, DockerRun) assert run_config is None or isinstance(run_config, DockerRun) # mypy image = get_flow_image(flow_run=flow_run) env_vars = self.populate_env_vars(flow_run, image, run_config=run_config) if not self.no_pull and len(image.split("/")) > 1: self.logger.info("Pulling image {}...".format(image)) registry = image.split("/")[0] if self.reg_allow_list and registry not in self.reg_allow_list: self.logger.error( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) raise ValueError( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) else: pull_output = self.docker_client.pull(image, stream=True, decode=True) for line in pull_output: self.logger.debug(line) self.logger.info( "Successfully pulled image {}...".format(image)) # Create any named volumes (if they do not already exist) for named_volume_name in self.named_volumes: try: self.docker_client.inspect_volume(name=named_volume_name) except docker.errors.APIError: self.logger.debug( "Creating named volume {}".format(named_volume_name)) self.docker_client.create_volume( name=named_volume_name, driver="local", labels={"prefect_created": "true"}, ) # Create a container self.logger.debug("Creating Docker container {}".format(image)) host_config = {"auto_remove": True} # type: dict container_mount_paths = self.container_mount_paths if container_mount_paths: host_config.update(binds=self.host_spec) if sys.platform.startswith("linux") and self.docker_interface: docker_internal_ip = get_docker_ip() host_config.update( extra_hosts={"host.docker.internal": docker_internal_ip}) networking_config = None # At the time of creation, you can only connect a container to a single network, # however you can create more connections after creation. # Connect first network in the creation step. If no network is connected here the container # is connected to the default `bridge` network. # The rest of the networks are connected after creation. if self.networks: networking_config = self.docker_client.create_networking_config({ self.networks[0]: self.docker_client.create_endpoint_config() }) # Try fallback on old, deprecated, behaviour. if self.network: networking_config = self.docker_client.create_networking_config( {self.network: self.docker_client.create_endpoint_config()}) labels = { "io.prefect.flow-name": flow_run.flow.name, "io.prefect.flow-id": flow_run.flow.id, "io.prefect.flow-run-id": flow_run.id, } container = self.docker_client.create_container( image, command=get_flow_run_command(flow_run), environment=env_vars, volumes=container_mount_paths, host_config=self.docker_client.create_host_config(**host_config), networking_config=networking_config, labels=labels, ) # Connect the rest of the networks if self.networks: for network in self.networks[1:]: self.docker_client.connect_container_to_network( container=container, net_id=network) # Start the container self.logger.debug("Starting Docker container with ID {}".format( container.get("Id"))) if self.networks: self.logger.debug( "Adding container with ID {} to docker networks: {}.".format( container.get("Id"), self.networks)) if self.network: self.logger.debug("Adding container to docker network: {}".format( self.network)) self.docker_client.start(container=container.get("Id")) if self.show_flow_logs: self.stream_flow_logs(container.get("Id")) self.logger.debug("Docker container {} started".format( container.get("Id"))) return "Container ID: {}".format(container.get("Id"))
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on your local machine as Docker containers Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker run_config = self._get_run_config(flow_run, DockerRun) assert run_config is None or isinstance(run_config, DockerRun) # mypy image = get_flow_image(flow_run=flow_run) env_vars = self.populate_env_vars(flow_run, image, run_config=run_config) if not self.no_pull and len(image.split("/")) > 1: self.logger.info("Pulling image {}...".format(image)) registry = image.split("/")[0] if self.reg_allow_list and registry not in self.reg_allow_list: self.logger.error( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) raise ValueError( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) else: pull_output = self.docker_client.pull(image, stream=True, decode=True) for line in pull_output: self.logger.debug(line) self.logger.info("Successfully pulled image {}".format(image)) # Create any named volumes (if they do not already exist) for named_volume_name in self.named_volumes: try: self.docker_client.inspect_volume(name=named_volume_name) except docker.errors.APIError: self.logger.debug( "Creating named volume {}".format(named_volume_name)) self.docker_client.create_volume( name=named_volume_name, driver="local", labels={"prefect_created": "true"}, ) # Create a container self.logger.debug("Creating Docker container {}".format(image)) # By default, auto-remove containers host_config: Dict[str, Any] = {"auto_remove": True} # By default, no ports ports = None # Set up a host gateway for local communication; check the docker version since # this is not supported by older versions docker_engine_version = parse_version( self.docker_client.version()["Version"]) host_gateway_version = parse_version("20.10.0") if docker_engine_version < host_gateway_version: warnings.warn( "`host.docker.internal` could not be automatically resolved to your " "local host. This feature is not supported on Docker Engine " f"v{docker_engine_version}, upgrade to v{host_gateway_version}+ if you " "encounter issues.") else: # Compatibility for linux -- https://github.com/docker/cli/issues/2290 # Only supported by Docker v20.10.0+ which is our minimum recommend version host_config["extra_hosts"] = { "host.docker.internal": "host-gateway" } container_mount_paths = self.container_mount_paths if container_mount_paths: host_config.update(binds=self.host_spec) if run_config is not None and run_config.host_config: # The host_config passed from the run_config will overwrite defaults host_config.update(run_config.host_config) if run_config is not None and run_config.ports: ports = run_config.ports networking_config = None # At the time of creation, you can only connect a container to a single network, # however you can create more connections after creation. # Connect first network in the creation step. If no network is connected here the container # is connected to the default `bridge` network. # The rest of the networks are connected after creation. if self.networks: networking_config = self.docker_client.create_networking_config({ self.networks[0]: self.docker_client.create_endpoint_config() }) labels = { "io.prefect.flow-name": flow_run.flow.name, "io.prefect.flow-id": flow_run.flow.id, "io.prefect.flow-run-id": flow_run.id, } # Generate a container name to match the flow run name, ensuring it is docker # compatible and unique. Must match `[a-zA-Z0-9][a-zA-Z0-9_.-]+` in the end container_name = slugified_name = ( slugify( flow_run.name, lowercase=False, # Docker does not limit length but URL limits apply eventually so # limit the length for safety max_length=250, # Docker allows these characters for container names regex_pattern=r"[^a-zA-Z0-9_.-]+", ).lstrip( # Docker does not allow leading underscore, dash, or period "_-.") # Docker does not allow 0 character names so use the flow run id if name # would be empty after cleaning or flow_run.id) # Create the container with retries on name conflicts index = 0 # will be bumped on name colissions while True: try: container = self.docker_client.create_container( image, command=get_flow_run_command(flow_run), environment=env_vars, name=container_name, volumes=container_mount_paths, host_config=self.docker_client.create_host_config( **host_config), networking_config=networking_config, labels=labels, ports=ports, ) except docker.errors.APIError as exc: if "Conflict" in str(exc) and "container name" in str(exc): index += 1 container_name = f"{slugified_name}-{index}" else: raise else: break # Connect the rest of the networks if self.networks: for network in self.networks[1:]: self.docker_client.connect_container_to_network( container=container, net_id=network) # Start the container self.logger.debug( f"Starting Docker container with ID {container.get('Id')} and " f"name {container_name!r}") if self.networks: self.logger.debug( "Adding container with ID {} to docker networks: {}.".format( container.get("Id"), self.networks)) self.docker_client.start(container=container.get("Id")) if self.show_flow_logs: self.stream_flow_logs(container.get("Id")) self.logger.debug("Docker container {} started".format( container.get("Id"))) return "Container ID: {}".format(container.get("Id"))
def generate_task_definition(self, flow_run: GraphQLResult, run_config: ECSRun) -> Dict[str, Any]: """Generate an ECS task definition from a flow run Args: - flow_run (GraphQLResult): A flow run object - run_config (ECSRun): The flow's run config Returns: - dict: a dictionary representation of an ECS task definition """ if run_config.task_definition: taskdef = deepcopy(run_config.task_definition) elif run_config.task_definition_path: self.logger.debug( "Loading task definition template from %r", run_config.task_definition_path, ) template_bytes = read_bytes_from_path( run_config.task_definition_path) taskdef = yaml.safe_load(template_bytes) else: taskdef = deepcopy(self.task_definition) slug = slugify.slugify( f"{flow_run.flow.name}-{flow_run.id}", max_length=255 - len("prefect-"), word_boundary=True, save_order=True, ) taskdef["family"] = f"prefect-{slug}" # Add some metadata tags for easier tracking by users taskdef.setdefault("tags", []).extend([ { "key": "prefect:flow-id", "value": flow_run.flow.id }, { "key": "prefect:flow-version", "value": str(flow_run.flow.version) }, ]) # Get the flow container (creating one if it doesn't already exist) containers = taskdef.setdefault("containerDefinitions", []) for container in containers: if container.get("name") == "flow": break else: container = {"name": "flow"} containers.append(container) # Set flow image container["image"] = image = get_flow_image( flow_run, default=container.get("image")) # Add `PREFECT__CONTEXT__IMAGE` environment variable env = {"PREFECT__CONTEXT__IMAGE": image} container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("environment", []): if entry["name"] not in env: container_env.append(entry) container["environment"] = container_env # Ensure that cpu/memory are strings not integers if "cpu" in taskdef: taskdef["cpu"] = str(taskdef["cpu"]) if "memory" in taskdef: taskdef["memory"] = str(taskdef["memory"]) # If we're using Fargate, we need to explicitly set an executionRoleArn on the # task definition. If one isn't present, then try to load it from the run_config # and then the agent's default. if "executionRoleArn" not in taskdef: if run_config.execution_role_arn: taskdef["executionRoleArn"] = run_config.execution_role_arn elif self.execution_role_arn: taskdef["executionRoleArn"] = self.execution_role_arn # Set requiresCompatibilities if not already set if self.launch_type is set if "requiresCompatibilities" not in taskdef and self.launch_type: taskdef["requiresCompatibilities"] = [self.launch_type] return taskdef
def deploy_flow(self, flow_run: GraphQLResult) -> str: """ Deploy flow runs on your local machine as Docker containers Args: - flow_run (GraphQLResult): A GraphQLResult flow run object Returns: - str: Information about the deployment """ # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker run_config = self._get_run_config(flow_run, DockerRun) assert run_config is None or isinstance(run_config, DockerRun) # mypy image = get_flow_image(flow_run=flow_run) env_vars = self.populate_env_vars(flow_run, image, run_config=run_config) if not self.no_pull and len(image.split("/")) > 1: self.logger.info("Pulling image {}...".format(image)) registry = image.split("/")[0] if self.reg_allow_list and registry not in self.reg_allow_list: self.logger.error( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) raise ValueError( "Trying to pull image from a Docker registry '{}' which" " is not in the reg_allow_list".format(registry)) else: pull_output = self.docker_client.pull(image, stream=True, decode=True) for line in pull_output: self.logger.debug(line) self.logger.info("Successfully pulled image {}".format(image)) # Create any named volumes (if they do not already exist) for named_volume_name in self.named_volumes: try: self.docker_client.inspect_volume(name=named_volume_name) except docker.errors.APIError: self.logger.debug( "Creating named volume {}".format(named_volume_name)) self.docker_client.create_volume( name=named_volume_name, driver="local", labels={"prefect_created": "true"}, ) # Create a container self.logger.debug("Creating Docker container {}".format(image)) host_config = {"auto_remove": True} # type: dict container_mount_paths = self.container_mount_paths if container_mount_paths: host_config.update(binds=self.host_spec) networking_config = None # At the time of creation, you can only connect a container to a single network, # however you can create more connections after creation. # Connect first network in the creation step. If no network is connected here the container # is connected to the default `bridge` network. # The rest of the networks are connected after creation. if self.networks: networking_config = self.docker_client.create_networking_config({ self.networks[0]: self.docker_client.create_endpoint_config() }) # Try fallback on old, deprecated, behaviour. if self.network: networking_config = self.docker_client.create_networking_config( {self.network: self.docker_client.create_endpoint_config()}) labels = { "io.prefect.flow-name": flow_run.flow.name, "io.prefect.flow-id": flow_run.flow.id, "io.prefect.flow-run-id": flow_run.id, } # Generate a container name to match the flow run name, ensuring it is docker # compatible and unique. Must match `[a-zA-Z0-9][a-zA-Z0-9_.-]+` in the end container_name = slugified_name = ( slugify( flow_run.name, lowercase=False, # Docker does not limit length but URL limits apply eventually so # limit the length for safety max_length=250, # Docker allows these characters for container names regex_pattern=r"[^a-zA-Z0-9_.-]+", ).lstrip( # Docker does not allow leading underscore, dash, or period "_-.") # Docker does not allow 0 character names so use the flow run id if name # would be empty after cleaning or flow_run.id) # Create the container with retries on name conflicts index = 0 # will be bumped on name colissions while True: try: container = self.docker_client.create_container( image, command=get_flow_run_command(flow_run), environment=env_vars, name=container_name, volumes=container_mount_paths, host_config=self.docker_client.create_host_config( **host_config), networking_config=networking_config, labels=labels, ) except docker.errors.APIError as exc: if "Conflict" in str(exc) and "container name" in str(exc): index += 1 container_name = f"{slugified_name}-{index}" else: raise else: break # Connect the rest of the networks if self.networks: for network in self.networks[1:]: self.docker_client.connect_container_to_network( container=container, net_id=network) # Start the container self.logger.debug( f"Starting Docker container with ID {container.get('Id')} and " f"name {container_name!r}") if self.networks: self.logger.debug( "Adding container with ID {} to docker networks: {}.".format( container.get("Id"), self.networks)) if self.network: self.logger.debug("Adding container to docker network: {}".format( self.network)) self.docker_client.start(container=container.get("Id")) if self.show_flow_logs: self.stream_flow_logs(container.get("Id")) self.logger.debug("Docker container {} started".format( container.get("Id"))) return "Container ID: {}".format(container.get("Id"))
def generate_job_spec_from_environment(self, flow_run: GraphQLResult, image: str = None) -> dict: """ Populate a k8s job spec. This spec defines a k8s job that handles executing a flow. This method runs each time the agent receives a flow to run. That job spec can optionally be customized by setting the following environment variables on the agent. - `NAMESPACE`: the k8s namespace the job will run in, defaults to `"default"` - `JOB_MEM_REQUEST`: memory requested, for example, `256Mi` for 256 MB. If this environment variable is not set, the cluster's defaults will be used. - `JOB_MEM_LIMIT`: memory limit, for example, `512Mi` For 512 MB. If this environment variable is not set, the cluster's defaults will be used. - `JOB_CPU_REQUEST`: CPU requested, defaults to `"100m"` - `JOB_CPU_LIMIT`: CPU limit, defaults to `"100m"` - `IMAGE_PULL_POLICY`: policy for pulling images. Defaults to `"IfNotPresent"`. - `IMAGE_PULL_SECRETS`: name of an existing k8s secret that can be used to pull images. This is necessary if your flow uses an image that is in a non-public container registry, such as Amazon ECR, or in a public registry that requires authentication to avoid hitting rate limits. To specify multiple image pull secrets, provide a comma-delimited string with no spaces, like `"some-secret,other-secret"`. - `SERVICE_ACCOUNT_NAME`: name of a service account to run the job as. By default, none is specified. - `YAML_TEMPLATE`: a path to where the YAML template should be loaded from. defaults to the embedded `job_spec.yaml`. Args: - flow_run (GraphQLResult): A flow run object - image (str, optional): The full name of an image to use for the job Returns: - dict: a dictionary representation of a k8s job for flow execution """ identifier = str(uuid.uuid4())[:8] yaml_path = os.getenv( "YAML_TEMPLATE", os.path.join(os.path.dirname(__file__), "job_spec.yaml")) with open(yaml_path, "r") as job_file: job = yaml.safe_load(job_file) job_name = "prefect-job-{}".format(identifier) # Populate job metadata for identification k8s_labels = { "prefect.io/identifier": identifier, "prefect.io/flow_run_id": flow_run.id, # type: ignore "prefect.io/flow_id": flow_run.flow.id, # type: ignore } job["metadata"]["name"] = job_name job["metadata"]["labels"].update(**k8s_labels) job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels) # Use provided image for job if image is None: image = get_flow_image(flow_run=flow_run) job["spec"]["template"]["spec"]["containers"][0]["image"] = image self.logger.debug("Using image {} for job".format(image)) # Datermine flow run command job["spec"]["template"]["spec"]["containers"][0]["args"] = [ get_flow_run_command(flow_run) ] # Populate environment variables for flow run execution env = job["spec"]["template"]["spec"]["containers"][0]["env"] env[0]["value"] = config.cloud.api or "https://api.prefect.io" env[1]["value"] = config.cloud.agent.auth_token env[2]["value"] = flow_run.id # type: ignore env[3]["value"] = flow_run.flow.id # type: ignore env[4]["value"] = self.namespace env[5]["value"] = str(self.labels) env[6]["value"] = str(self.log_to_cloud).lower() env[7]["value"] = self.env_vars.get("PREFECT__LOGGING__LEVEL", config.logging.level) # append all user provided values for key, value in self.env_vars.items(): env.append(dict(name=key, value=value)) # Use image pull secrets if provided if self.image_pull_secrets: for idx, secret_name in enumerate(self.image_pull_secrets): # this check preserves behavior from previous releases, # where prefect would only overwrite the first entry in # imagePullSecrets if idx == 0: job["spec"]["template"]["spec"]["imagePullSecrets"][0] = { "name": secret_name } else: job["spec"]["template"]["spec"]["imagePullSecrets"].append( {"name": secret_name}) else: del job["spec"]["template"]["spec"]["imagePullSecrets"] # Set resource requirements if provided resources = job["spec"]["template"]["spec"]["containers"][0][ "resources"] if os.getenv("JOB_MEM_REQUEST"): resources["requests"]["memory"] = os.getenv("JOB_MEM_REQUEST") if os.getenv("JOB_MEM_LIMIT"): resources["limits"]["memory"] = os.getenv("JOB_MEM_LIMIT") if os.getenv("JOB_CPU_REQUEST"): resources["requests"]["cpu"] = os.getenv("JOB_CPU_REQUEST") if os.getenv("JOB_CPU_LIMIT"): resources["limits"]["cpu"] = os.getenv("JOB_CPU_LIMIT") if self.volume_mounts: job["spec"]["template"]["spec"]["containers"][0][ "volumeMounts"] = self.volume_mounts else: del job["spec"]["template"]["spec"]["containers"][0][ "volumeMounts"] if self.volumes: job["spec"]["template"]["spec"]["volumes"] = self.volumes else: del job["spec"]["template"]["spec"]["volumes"] if os.getenv("IMAGE_PULL_POLICY"): job["spec"]["template"]["spec"]["containers"][0][ "imagePullPolicy"] = os.getenv("IMAGE_PULL_POLICY") if self.service_account_name: job["spec"]["template"]["spec"][ "serviceAccountName"] = self.service_account_name return job
def generate_task_definition(self, flow_run: GraphQLResult, run_config: ECSRun) -> Dict[str, Any]: """Generate an ECS task definition from a flow run Args: - flow_run (GraphQLResult): A flow run object - run_config (ECSRun): The flow's run config Returns: - dict: a dictionary representation of an ECS task definition """ if run_config.task_definition: taskdef = deepcopy(run_config.task_definition) elif run_config.task_definition_path: self.logger.debug( "Loading task definition template from %r", run_config.task_definition_path, ) template_bytes = read_bytes_from_path( run_config.task_definition_path) taskdef = yaml.safe_load(template_bytes) else: taskdef = deepcopy(self.task_definition) slug = slugify.slugify( flow_run.flow.name, max_length=255 - len("prefect-"), word_boundary=True, save_order=True, ) family = f"prefect-{slug}" tags = self.get_task_definition_tags(flow_run) taskdef["family"] = family taskdef_tags = [{"key": k, "value": v} for k, v in tags.items()] for entry in taskdef.get("tags", []): if entry["key"] not in tags: taskdef_tags.append(entry) taskdef["tags"] = taskdef_tags # Get the flow container (creating one if it doesn't already exist) containers = taskdef.setdefault("containerDefinitions", []) for container in containers: if container.get("name") == "flow": break else: container = {"name": "flow"} containers.append(container) # Set flow image container["image"] = image = get_flow_image(flow_run) # Set flow run command container["command"] = [ "/bin/sh", "-c", get_flow_run_command(flow_run) ] # Set taskRoleArn if configured if run_config.task_role_arn: taskdef["taskRoleArn"] = run_config.task_role_arn # Populate static environment variables from the following sources, # with precedence: # - Static environment variables, hardcoded below # - Values in the task definition template env = { "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__IMAGE": image, "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", } container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("environment", []): if entry["name"] not in env: container_env.append(entry) container["environment"] = container_env # Set resource requirements, if provided # Also ensure that cpu/memory are strings not integers if run_config.cpu: taskdef["cpu"] = str(run_config.cpu) elif "cpu" in taskdef: taskdef["cpu"] = str(taskdef["cpu"]) if run_config.memory: taskdef["memory"] = str(run_config.memory) elif "memory" in taskdef: taskdef["memory"] = str(taskdef["memory"]) return taskdef
def generate_job_spec_from_run_config(self, flow_run: GraphQLResult, run_config: KubernetesRun) -> dict: """Generate a k8s job spec for a flow run. Args: - flow_run (GraphQLResult): A flow run object - run_config (KubernetesRun): The flow run's run_config Returns: - dict: a dictionary representation of a k8s job for flow execution """ if run_config.job_template: job = run_config.job_template else: job_template_path = run_config.job_template_path or self.job_template_path self.logger.debug("Loading job template from %r", job_template_path) template_bytes = read_bytes_from_path(job_template_path) job = yaml.safe_load(template_bytes) identifier = uuid.uuid4().hex[:8] job_name = f"prefect-job-{identifier}" # Populate job metadata for identification k8s_labels = { "prefect.io/identifier": identifier, "prefect.io/flow_run_id": flow_run.id, # type: ignore "prefect.io/flow_id": flow_run.flow.id, # type: ignore } _get_or_create(job, "metadata.labels") _get_or_create(job, "spec.template.metadata.labels") job["metadata"]["name"] = job_name job["metadata"]["labels"].update(**k8s_labels) job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels) pod_spec = job["spec"]["template"]["spec"] # Configure `service_account_name` if specified if run_config.service_account_name is not None: # On run-config, always override service_account_name = (run_config.service_account_name ) # type: Optional[str] elif "serviceAccountName" in pod_spec and ( run_config.job_template or run_config.job_template_path): # On run-config job-template, no override service_account_name = None else: # Use agent value, if provided service_account_name = self.service_account_name if service_account_name is not None: pod_spec["serviceAccountName"] = service_account_name # Configure `image_pull_secrets` if specified if run_config.image_pull_secrets is not None: # On run-config, always override image_pull_secrets = (run_config.image_pull_secrets ) # type: Optional[Iterable[str]] elif "imagePullSecrets" in pod_spec and (run_config.job_template or run_config.job_template_path): # On run-config job template, no override image_pull_secrets = None else: # Use agent, if provided image_pull_secrets = self.image_pull_secrets if image_pull_secrets is not None: pod_spec["imagePullSecrets"] = [{ "name": s } for s in image_pull_secrets] # Default restartPolicy to Never _get_or_create(job, "spec.template.spec.restartPolicy", "Never") # Get the first container, which is used for the prefect job containers = _get_or_create(job, "spec.template.spec.containers", []) if not containers: containers.append({}) container = containers[0] # Set container image container["image"] = image = get_flow_image( flow_run, default=container.get("image")) # Set flow run command container["args"] = get_flow_run_command(flow_run).split() # Populate environment variables from the following sources, # with precedence: # - Values required for flow execution, hardcoded below # - Values set on the KubernetesRun object # - Values set using the `--env` CLI flag on the agent # - Values in the job template env = {"PREFECT__LOGGING__LEVEL": config.logging.level} env.update(self.env_vars) if run_config.env: env.update(run_config.env) env.update({ "PREFECT__BACKEND": config.backend, "PREFECT__CLOUD__AGENT__LABELS": str(self.labels), "PREFECT__CLOUD__API": config.cloud.api, "PREFECT__CLOUD__AUTH_TOKEN": config.cloud.agent.auth_token, "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run.id, "PREFECT__CONTEXT__FLOW_ID": flow_run.flow.id, "PREFECT__CONTEXT__IMAGE": image, "PREFECT__LOGGING__LOG_TO_CLOUD": str(self.log_to_cloud).lower(), "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", }) container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("env", []): if entry["name"] not in env: container_env.append(entry) container["env"] = container_env # Set resource requirements if provided _get_or_create(container, "resources.requests") _get_or_create(container, "resources.limits") resources = container["resources"] if run_config.memory_request: resources["requests"]["memory"] = run_config.memory_request if run_config.memory_limit: resources["limits"]["memory"] = run_config.memory_limit if run_config.cpu_request: resources["requests"]["cpu"] = run_config.cpu_request if run_config.cpu_limit: resources["limits"]["cpu"] = run_config.cpu_limit return job
def generate_job_spec_from_run_config(self, flow_run: GraphQLResult) -> dict: """Generate a k8s job spec for a flow run. Args: - flow_run (GraphQLResult): A flow run object Returns: - dict: a dictionary representation of a k8s job for flow execution """ run_config = RunConfigSchema().load(flow_run.flow.run_config) if run_config.job_template: job = run_config.job_template else: job_template_path = run_config.job_template_path or self.job_template_path self.logger.debug("Loading job template from %r", job_template_path) template_bytes = read_bytes_from_path(job_template_path) job = yaml.safe_load(template_bytes) identifier = uuid.uuid4().hex[:8] job_name = f"prefect-job-{identifier}" # Populate job metadata for identification k8s_labels = { "prefect.io/identifier": identifier, "prefect.io/flow_run_id": flow_run.id, # type: ignore "prefect.io/flow_id": flow_run.flow.id, # type: ignore } _get_or_create(job, "metadata.labels") _get_or_create(job, "spec.template.metadata.labels") job["metadata"]["name"] = job_name job["metadata"]["labels"].update(**k8s_labels) job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels) # Get the first container, which is used for the prefect job containers = _get_or_create(job, "spec.template.spec.containers", []) if not containers: containers.append({}) container = containers[0] # Set container image container["image"] = image = get_flow_image(flow_run) # Set flow run command container["args"] = [get_flow_run_command(flow_run)] # Populate environment variables from the following sources, # with precedence: # - Values required for flow execution, hardcoded below # - Values set on the KubernetesRun object # - Values set using the `--env` CLI flag on the agent # - Values in the job template env = self.env_vars.copy() if run_config.env: env.update(run_config.env) env.update({ "PREFECT__CLOUD__API": config.cloud.api, "PREFECT__CLOUD__AUTH_TOKEN": config.cloud.agent.auth_token, "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run.id, "PREFECT__CONTEXT__FLOW_ID": flow_run.flow.id, "PREFECT__CONTEXT__IMAGE": image, "PREFECT__LOGGING__LOG_TO_CLOUD": str(self.log_to_cloud).lower(), "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", }) container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("env", []): if entry["name"] not in env: container_env.append(entry) container["env"] = container_env # Set resource requirements if provided _get_or_create(container, "resources.requests") _get_or_create(container, "resources.limits") resources = container["resources"] if run_config.memory_request: resources["requests"]["memory"] = run_config.memory_request if run_config.memory_limit: resources["limits"]["memory"] = run_config.memory_limit if run_config.cpu_request: resources["requests"]["cpu"] = run_config.cpu_request if run_config.cpu_limit: resources["limits"]["cpu"] = run_config.cpu_limit return job