def test_generate_job_spec_image_pull_secrets(self, tmpdir): template_path = str(tmpdir.join("job.yaml")) template = self.read_default_template() template["spec"]["template"]["spec"]["imagePullSecrets"] = [ {"name": "on-agent-template"} ] with open(template_path, "w") as f: yaml.safe_dump(template, f) self.agent.image_pull_secrets = ["on-agent"] self.agent.job_template_path = template_path template["spec"]["template"]["spec"]["imagePullSecrets"] = [ {"name": "on-run-config-template"} ] run_config = KubernetesRun( job_template=template, image_pull_secrets=["on-run-config"] ) # Check precedence order: # 1. Explicit on run-config" job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] == [ {"name": "on-run-config"} ] # 2. In job template on run-config run_config.image_pull_secrets = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] == [ {"name": "on-run-config-template"} ] # None in run-config job template is still used run_config.job_template["spec"]["template"]["spec"]["imagePullSecrets"] = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] is None # 3. Explicit on agent # Not present in job template run_config.job_template["spec"]["template"]["spec"].pop("imagePullSecrets") job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] == [ {"name": "on-agent"} ] # No job template present run_config.job_template = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] == [ {"name": "on-agent"} ] # 4. In job template on agent self.agent.image_pull_secrets = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] == [ {"name": "on-agent-template"} ]
def test_service_account_name_and_image_pull_secrets(): config = KubernetesRun(service_account_name="my-account", image_pull_secrets=("a", "b", "c")) assert config.service_account_name == "my-account" assert config.image_pull_secrets == ["a", "b", "c"] # Ensure falsey-lists aren't converted to `None`. config = KubernetesRun(image_pull_secrets=[]) assert config.image_pull_secrets == []
def test_cpu_limit_and_request_acceptable_types(): config = KubernetesRun() assert config.cpu_limit is None assert config.cpu_request is None config = KubernetesRun(cpu_limit="200m", cpu_request="100m") assert config.cpu_limit == "200m" assert config.cpu_request == "100m" config = KubernetesRun(cpu_limit=0.5, cpu_request=0.1) assert config.cpu_limit == "0.5" assert config.cpu_request == "0.1"
def test_generate_job_spec_service_account_name(self, tmpdir): template_path = str(tmpdir.join("job.yaml")) template = self.read_default_template() template["spec"]["template"]["spec"][ "serviceAccountName"] = "on-agent-template" with open(template_path, "w") as f: yaml.safe_dump(template, f) self.agent.service_account_name = "on-agent" self.agent.job_template_path = template_path template["spec"]["template"]["spec"][ "serviceAccountName"] = "on-run-config-template" run_config = KubernetesRun(job_template=template, service_account_name="on-run-config") # Check precedence order: # 1. Explicit on run-config" job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"][ "serviceAccountName"] == "on-run-config" # 2. In job template on run-config run_config.service_account_name = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert (job["spec"]["template"]["spec"]["serviceAccountName"] == "on-run-config-template") # None in run-config job template is still used run_config.job_template["spec"]["template"]["spec"][ "serviceAccountName"] = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["serviceAccountName"] is None # 3. Explicit on agent # Not present in job template run_config.job_template["spec"]["template"]["spec"].pop( "serviceAccountName") job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"][ "serviceAccountName"] == "on-agent" # No job template present run_config.job_template = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"][ "serviceAccountName"] == "on-agent" # 4. In job template on agent self.agent.service_account_name = None job = self.agent.generate_job_spec(self.build_flow_run(run_config)) assert (job["spec"]["template"]["spec"]["serviceAccountName"] == "on-agent-template")
def test_generate_job_spec_image_pull_secrets_empty_string_in_runconfig( self, tmpdir): """Regression test for issue #5001.""" run_config = KubernetesRun(image_pull_secrets="") agent = KubernetesAgent(namespace="testing") job = agent.generate_job_spec(self.build_flow_run(run_config)) assert "imagePullSecrets" not in job["spec"]["template"]["spec"]
def test_generate_job_spec_prefect_logging_level_environment_variable( self, config, agent_env_vars, run_config_env_vars, expected_logging_level, tmpdir, backend, ): """ Check that PREFECT__LOGGING__LEVEL is set in precedence order """ with set_temporary_config(config): template_path = str(tmpdir.join("job.yaml")) template = self.read_default_template() template_env = template["spec"]["template"]["spec"]["containers"][ 0].setdefault("env", []) with open(template_path, "w") as f: yaml.safe_dump(template, f) self.agent.job_template_path = template_path self.agent.env_vars = agent_env_vars run_config = KubernetesRun(image="test-image", env=run_config_env_vars) flow_run = self.build_flow_run(run_config) job = self.agent.generate_job_spec(flow_run) env_list = job["spec"]["template"]["spec"]["containers"][0]["env"] env = {item["name"]: item["value"] for item in env_list} assert env["PREFECT__LOGGING__LEVEL"] == expected_logging_level
def test_local_job_template_path(tmpdir, scheme): job_template = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "labels": { "example": "foo" } }, } path = str(tmpdir.join("test.yaml")) if scheme is None: job_template_path = path else: # With a scheme, unix-style slashes are required job_template_path = f"{scheme}://" + os.path.splitdrive( path)[1].replace("\\", "/") with open(path, "w") as f: yaml.safe_dump(job_template, f) config = KubernetesRun(job_template_path=job_template_path) assert config.job_template_path is None assert config.job_template == job_template
def test_generate_job_spec_uses_job_template_provided_in_run_config(self): template = self.read_default_template() labels = template.setdefault("metadata", {}).setdefault("labels", {}) labels["TEST"] = "VALUE" flow_run = self.build_flow_run(KubernetesRun(job_template=template)) job = self.agent.generate_job_spec(flow_run) assert job["metadata"]["labels"]["TEST"] == "VALUE"
def test_generate_job_spec_metadata(self, tmpdir): template_path = str(tmpdir.join("job.yaml")) template = self.read_default_template() job_labels = template.setdefault("metadata", {}).setdefault("labels", {}) pod_labels = (template["spec"]["template"].setdefault( "metadata", {}).setdefault("labels", {})) job_labels.update({"JOB_LABEL": "VALUE1"}) pod_labels.update({"POD_LABEL": "VALUE2"}) with open(template_path, "w") as f: yaml.safe_dump(template, f) self.agent.job_template_path = template_path flow_run = self.build_flow_run(KubernetesRun()) job = self.agent.generate_job_spec(flow_run) identifier = job["metadata"]["labels"]["prefect.io/identifier"] labels = { "prefect.io/identifier": identifier, "prefect.io/flow_run_id": flow_run.id, "prefect.io/flow_id": flow_run.flow.id, } assert job["metadata"]["name"] assert job["metadata"]["labels"] == dict(JOB_LABEL="VALUE1", **labels) assert job["spec"]["template"]["metadata"]["labels"] == dict( POD_LABEL="VALUE2", **labels)
def SplitgraphKubernetesRun( image: str = None, env: dict = None, cpu_limit: Union[float, str] = None, cpu_request: Union[float, str] = None, memory_limit: str = None, memory_request: str = None, service_account_name: str = None, image_pull_secrets: Iterable[str] = None, labels: Iterable[str] = None, ) -> None: job_template = yaml.safe_load( pkgutil.get_data("dilib.prefect.run_configs", "job_template.yaml")) return KubernetesRun( job_template=job_template, image=image, env=env, cpu_limit=cpu_limit, cpu_request=cpu_request, memory_limit=memory_limit, memory_request=memory_request, service_account_name=service_account_name, image_pull_secrets=image_pull_secrets, labels=labels, )
def test_local_agent_deploy_unsupported_run_config(monkeypatch): popen = MagicMock() monkeypatch.setattr("prefect.agent.local.agent.Popen", popen) agent = LocalAgent() with pytest.raises( TypeError, match="`run_config` of type `KubernetesRun`, only `LocalRun` is supported", ): agent.deploy_flow( flow_run=GraphQLResult( { "id": "id", "flow": { "storage": Local().serialize(), "id": "foo", "core_version": "0.13.0", }, "run_config": KubernetesRun().serialize(), }, ) ) assert not popen.called assert len(agent.processes) == 0
def test_generate_job_spec_image_pull_secrets_empty_string_in_env( self, tmpdir, monkeypatch): """Regression test for issue #5001.""" run_config = KubernetesRun() monkeypatch.setenv("IMAGE_PULL_SECRETS", "") agent = KubernetesAgent(namespace="testing") job = agent.generate_job_spec(self.build_flow_run(run_config)) assert "imagePullSecrets" not in job["spec"]["template"]["spec"]
def build_standard_config( self, image_name: str, flow_path: str, env: dict = None, cpu_limit: Union[float, str] = None, cpu_request: Union[float, str] = None, memory_limit: str = None, memory_request: str = None, service_account_name: str = None, image_pull_secrets: Iterable[str] = None, labels: Iterable[str] = None, ) -> Tuple[SplitgraphKubernetesFlowConfig, SplitgraphKubernetesFlowReg]: image_name_base = os.environ.get('IMAGE_NAME_BASE') registry = os.environ.get('REGISTRY') registry_base = os.environ.get('REGISTRY_BASE') image_version = os.environ.get('IMAGE_VERSION') image = f'{registry_base}/{image_name_base}/{image_name}' storage = Docker( image_name=image, image_tag=image_version, local_image=True, stored_as_script=True, path=flow_path, registry_url=registry, ) job_template = yaml.safe_load( pkgutil.get_data("dilib.splitgraph", "job_template.yaml")) run_config = KubernetesRun( job_template=job_template, image=image, env={ **env, **self.dilib_context.default_env }, cpu_limit=cpu_limit, cpu_request=cpu_request, memory_limit=memory_limit, memory_request=memory_request, service_account_name=service_account_name, image_pull_secrets=image_pull_secrets, labels=labels, ) flow_config = dict( run_config=run_config, storage=storage, ) reg_config = dict(project_name=self.dilib_context.project_name, build=False, labels=[ "k8s", f"prefect:{self.dilib_context.prefect_env}", ]) return flow_config, reg_config
def configure_run_config(cluster: Cluster, recipe_bakery: RecipeBakery, recipe_name: str, secrets: Dict): if cluster.type == FARGATE_CLUSTER: definition = { "networkMode": "awsvpc", "cpu": 2048, "memory": 16384, "containerDefinitions": [{ "name": "flow" }], "executionRoleArn": cluster.cluster_options.execution_role_arn, } run_config = ECSRun( image=cluster.worker_image, labels=[recipe_bakery.id], task_definition=definition, run_task_kwargs={ "tags": [ { "key": "Project", "value": "pangeo-forge" }, { "key": "Recipe", "value": recipe_name }, ] }, ) return run_config elif cluster.type == AKS_CLUSTER: job_template = yaml.safe_load(""" apiVersion: batch/v1 kind: Job metadata: annotations: "cluster-autoscaler.kubernetes.io/safe-to-evict": "false" spec: template: spec: containers: - name: flow """) run_config = KubernetesRun( job_template=job_template, image=cluster.worker_image, labels=[recipe_bakery.id], memory_request="10000Mi", cpu_request="2048m", env={ "AZURE_STORAGE_CONNECTION_STRING": secrets[cluster.flow_storage_options.secret] }, ) return run_config else: raise UnsupportedClusterType
def run_config(self) -> RunConfig: kube_run_config = KubernetesRun( cpu_request=self._kubernetes_cpu, memory_request=self._kubernetes_memory, image=self._image, labels=[self._agent], env=self._generate_env(), ) return kube_run_config
def test_generate_job_spec_environment_variables(self, tmpdir): """Check that environment variables are set in precedence order - CUSTOM1 & CUSTOM2 are set on the template - CUSTOM2 & CUSTOM3 are set on the agent - CUSTOM3 & CUSTOM4 are set on the RunConfig """ template_path = str(tmpdir.join("job.yaml")) template = self.read_default_template() template_env = template["spec"]["template"]["spec"]["containers"][ 0].setdefault("env", []) template_env.extend([ { "name": "CUSTOM1", "value": "VALUE1" }, { "name": "CUSTOM2", "value": "VALUE2" }, ]) with open(template_path, "w") as f: yaml.safe_dump(template, f) self.agent.job_template_path = template_path self.agent.env_vars = {"CUSTOM2": "OVERRIDE2", "CUSTOM3": "VALUE3"} run_config = KubernetesRun(image="test-image", env={ "CUSTOM3": "OVERRIDE3", "CUSTOM4": "VALUE4" }) flow_run = self.build_flow_run(run_config) job = self.agent.generate_job_spec(flow_run) env_list = job["spec"]["template"]["spec"]["containers"][0]["env"] env = {item["name"]: item["value"] for item in env_list} assert env == { "PREFECT__CLOUD__API": prefect.config.cloud.api, "PREFECT__CLOUD__AUTH_TOKEN": prefect.config.cloud.agent.auth_token, "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run.id, "PREFECT__CONTEXT__FLOW_ID": flow_run.flow.id, "PREFECT__CONTEXT__IMAGE": "test-image", "PREFECT__LOGGING__LOG_TO_CLOUD": str(self.agent.log_to_cloud).lower(), "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", "CUSTOM1": "VALUE1", "CUSTOM2": "OVERRIDE2", # Agent env-vars override those in template "CUSTOM3": "OVERRIDE3", # RunConfig env-vars override those on agent and template "CUSTOM4": "VALUE4", }
def test_generate_job_spec_sets_image_pull_policy_from_run_config( self, image_pull_policy): template = self.read_default_template() config = KubernetesRun(job_template=template, image_pull_policy=image_pull_policy) flow_run = self.build_flow_run(config) job = self.agent.generate_job_spec(flow_run) assert (job["spec"]["template"]["spec"]["containers"][0] ["imagePullPolicy"] == image_pull_policy)
def test_generate_job_spec_image_pull_secrets_from_env( self, tmpdir, monkeypatch): run_config = KubernetesRun() monkeypatch.setenv("IMAGE_PULL_SECRETS", "in-env") agent = KubernetesAgent(namespace="testing") job = agent.generate_job_spec(self.build_flow_run(run_config)) assert job["spec"]["template"]["spec"]["imagePullSecrets"] == [{ "name": "in-env" }]
def test_job_template(kind): job_template = { "apiVersion": "batch/v1", "kind": "Job", "metadata": {"labels": {"example": "foo"}}, } arg = job_template if kind is dict else yaml.safe_dump(job_template) config = KubernetesRun(job_template=arg) assert config.job_template_path is None assert config.job_template == job_template
def test_hash_flow(): flow = TEST_FLOW.copy() integration = prefect_saturn.PrefectCloudIntegration( prefect_cloud_project_name=TEST_PREFECT_PROJECT_NAME) with patch("prefect_saturn.core.Client", new=MockClient): flow_hash = integration._hash_flow(flow) assert isinstance(flow_hash, str) and len(flow_hash) > 0 # should be deterministic flow_hash_again = integration._hash_flow(flow) assert flow_hash == flow_hash_again # should not be impacted by storage flow.storage = Webhook( build_request_kwargs={}, build_request_http_method="POST", get_flow_request_kwargs={}, get_flow_request_http_method="GET", ) assert flow_hash == integration._hash_flow(flow) # should not be impacted by environment or run_config if RUN_CONFIG_AVAILABLE: flow.run_config = KubernetesRun() elif KUBE_JOB_ENV_AVAILABLE: flow.environment = KubernetesJobEnvironment() assert flow_hash == integration._hash_flow(flow) # should not change if you add a new task @task def goodbye_task(): logger = prefect.context.get("logger") logger.info("adios") flow.tasks = [hello_task, goodbye_task] new_flow_hash = integration._hash_flow(flow) assert isinstance(new_flow_hash, str) and len(new_flow_hash) > 0 assert new_flow_hash == flow_hash # should change if flow name changes flow.name = str(uuid.uuid4()) new_flow_hash = integration._hash_flow(flow) assert new_flow_hash != flow_hash # should change if project name changes previous_flow_hash = new_flow_hash integration.prefect_cloud_project_name = str(uuid.uuid4()) new_flow_hash = integration._hash_flow(flow) assert isinstance(new_flow_hash, str) and len(new_flow_hash) > 0 assert new_flow_hash != previous_flow_hash
def test_no_args(): config = KubernetesRun() assert config.job_template_path is None assert config.job_template is None assert config.image is None assert config.env is None assert config.cpu_limit is None assert config.cpu_request is None assert config.memory_limit is None assert config.memory_request is None assert config.labels == set()
def test_generate_job_spec_resources(self): flow_run = self.build_flow_run( KubernetesRun( cpu_request=1, cpu_limit=2, memory_request="4G", memory_limit="8G" ) ) job = self.agent.generate_job_spec(flow_run) resources = job["spec"]["template"]["spec"]["containers"][0]["resources"] assert resources == { "limits": {"cpu": "2", "memory": "8G"}, "requests": {"cpu": "1", "memory": "4G"}, }
def test_no_args(): config = KubernetesRun() assert config.job_template_path is None assert config.job_template is None assert config.image is None assert config.env is None assert config.cpu_limit is None assert config.cpu_request is None assert config.memory_limit is None assert config.memory_request is None assert config.service_account_name is None assert config.image_pull_secrets is None assert config.labels == set()
def test_get_flow_image_run_config_default_value_from_core_version(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "core_version": "0.13.0", "storage": Local().serialize(), "run_config": KubernetesRun().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run) assert image == "prefecthq/prefect:all_extras-0.13.0"
def test_get_flow_image_run_config_image_on_RunConfig(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Local().serialize(), "id": "id", }), "run_config": KubernetesRun(image="myfancyimage").serialize(), "id": "id", }) image = get_flow_image(flow_run) assert image == "myfancyimage"
def test_environment_has_api_key_from_config(self, config_with_api_key): """Check that the API key is passed through from the config via environ""" flow_run = self.build_flow_run(KubernetesRun()) agent = KubernetesAgent(namespace="testing", ) job = agent.generate_job_spec(flow_run) env_list = job["spec"]["template"]["spec"]["containers"][0]["env"] env = {item["name"]: item["value"] for item in env_list} assert env["PREFECT__CLOUD__API_KEY"] == "TEST_KEY" assert env["PREFECT__CLOUD__AUTH_TOKEN"] == "TEST_KEY" assert env[ "PREFECT__CLOUD__TENANT_ID"] == config_with_api_key.cloud.tenant_id
def test_get_flow_image_run_config_default_value_from_core_version(version): flow_run = GraphQLResult({ "flow": GraphQLResult({ "core_version": version, "storage": Local().serialize(), "run_config": KubernetesRun().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run) expected_version = version.split("+")[0] if version else "latest" assert image == f"prefecthq/prefect:all_extras-{expected_version}"
def test_environment_has_tenant_id_from_server(self, config_with_api_key): """Check that the API key is passed through from the config via environ""" flow_run = self.build_flow_run(KubernetesRun()) tenant_id = uuid.uuid4() with set_temporary_config({"cloud.tenant_id": None}): agent = KubernetesAgent(namespace="testing") agent.client._get_auth_tenant = MagicMock(return_value=tenant_id) job = agent.generate_job_spec(flow_run) env_list = job["spec"]["template"]["spec"]["containers"][0]["env"] env = {item["name"]: item["value"] for item in env_list} assert env["PREFECT__CLOUD__API_KEY"] == "TEST_KEY" assert env["PREFECT__CLOUD__AUTH_TOKEN"] == "TEST_KEY" assert env["PREFECT__CLOUD__TENANT_ID"] == tenant_id
def test_get_flow_image_run_config_docker_storage(): flow_run = GraphQLResult({ "flow": GraphQLResult({ "storage": Docker(registry_url="test", image_name="name", image_tag="tag").serialize(), "run_config": KubernetesRun().serialize(), "id": "id", }), "id": "id", }) image = get_flow_image(flow_run) assert image == "test/name:tag"
def test_generate_job_spec_uses_job_template_path_provided_in_run_config( self, tmpdir, monkeypatch): path = str(tmpdir.join("job.yaml")) template = self.read_default_template() labels = template.setdefault("metadata", {}).setdefault("labels", {}) labels["TEST"] = "VALUE" with open(path, "w") as f: yaml.safe_dump(template, f) template_path = f"agent://{path}" flow_run = self.build_flow_run( KubernetesRun(job_template_path=template_path)) mocked_read_bytes = MagicMock(wraps=read_bytes_from_path) monkeypatch.setattr( "prefect.agent.kubernetes.agent.read_bytes_from_path", mocked_read_bytes) job = self.agent.generate_job_spec(flow_run) assert job["metadata"]["labels"]["TEST"] == "VALUE" assert mocked_read_bytes.call_args[0] == (template_path, )