Exemplo n.º 1
0
def test_bad_user_defined_k8s_config_tags():
    @pipeline(tags={USER_DEFINED_K8S_CONFIG_KEY: {"other": {}}})
    def my_solid():
        pass

    with pytest.raises(DagsterInvalidConfigError):
        get_user_defined_k8s_config(my_solid.tags)
Exemplo n.º 2
0
def test_user_defined_k8s_config_tags():
    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {"cpu": "250m", "memory": "64Mi"},
                        "limits": {"cpu": "500m", "memory": "2560Mi"},
                    }
                }
            }
        }
    )
    def my_solid(_):
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(my_solid.tags)

    assert user_defined_k8s_config.container_config
    assert user_defined_k8s_config.container_config["resources"]
    resources = user_defined_k8s_config.container_config["resources"]
    assert resources["requests"]["cpu"] == "250m"
    assert resources["requests"]["memory"] == "64Mi"
    assert resources["limits"]["cpu"] == "500m"
    assert resources["limits"]["memory"] == "2560Mi"

    @solid
    def no_resource_tags_solid(_):
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(no_resource_tags_solid.tags)
    assert user_defined_k8s_config == UserDefinedDagsterK8sConfig()
Exemplo n.º 3
0
def test_bad_user_defined_k8s_config_tags():
    @pipeline(tags={USER_DEFINED_K8S_CONFIG_KEY: {"other": {}}})
    def my_pipeline():
        pass

    with pytest.raises(
            DagsterInvalidConfigError,
            match='Received unexpected config entry "other" at the root'):
        get_user_defined_k8s_config(my_pipeline.tags)
Exemplo n.º 4
0
def test_bad_deprecated_resource_tags():
    @pipeline(
        tags={
            K8S_RESOURCE_REQUIREMENTS_KEY: {
                "other": {"cpu": "250m", "memory": "64Mi"},
            }
        }
    )
    def resource_tags_pipeline():
        pass

    with pytest.raises(DagsterInvalidConfigError):
        get_user_defined_k8s_config(resource_tags_pipeline.tags)
Exemplo n.º 5
0
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority,
                         known_state):
    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    execute_step_args = ExecuteStepArgs(
        pipeline_origin=pipeline_context.reconstructable_pipeline.
        get_python_origin(),
        pipeline_run_id=pipeline_context.pipeline_run.run_id,
        step_keys_to_execute=[step.key],
        instance_ref=pipeline_context.instance.get_ref(),
        retry_mode=pipeline_context.executor.retries.for_inner_plan(),
        known_state=known_state,
        should_verify_step=True,
    )

    task = create_k8s_job_task(app)
    task_signature = task.si(
        execute_step_args_packed=pack_value(execute_step_args),
        job_config_dict=pipeline_context.executor.job_config.to_dict(),
        job_namespace=pipeline_context.executor.job_namespace,
        user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(),
        load_incluster_config=pipeline_context.executor.load_incluster_config,
        kubeconfig_file=pipeline_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_k8s_job".format(queue=queue),
    )
Exemplo n.º 6
0
def test_tags_to_plan():
    @solid
    def blank(_):
        pass

    @pipeline
    def k8s_ready():
        blank.tag(
            {
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "resources": {
                            "requests": {"cpu": "250m", "memory": "64Mi"},
                            "limits": {"cpu": "500m", "memory": "2560Mi"},
                        }
                    }
                }
            }
        )()

    plan = create_execution_plan(k8s_ready)
    step = list(plan.step_dict.values())[0]

    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    assert user_defined_k8s_config.container_config
    assert user_defined_k8s_config.container_config["resources"]
    resources = user_defined_k8s_config.container_config["resources"]
    assert resources["requests"]["cpu"] == "250m"
    assert resources["requests"]["memory"] == "64Mi"
    assert resources["limits"]["cpu"] == "500m"
    assert resources["limits"]["memory"] == "2560Mi"
Exemplo n.º 7
0
def test_construct_dagster_k8s_job_with_user_defined_service_account_name():
    @graph
    def user_defined_k8s_service_account_name_tags_graph():
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_service_account_name_tags_graph.to_job(tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "pod_spec_config": {
                    "service_account_name": "this-should-take-precedence",
                },
            },
        }, ).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
        service_account_name="this-should-be-overriden",
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    service_account_name = job["spec"]["template"]["spec"][
        "service_account_name"]
    assert service_account_name == "this-should-take-precedence"
Exemplo n.º 8
0
def test_construct_dagster_k8s_job_with_user_defined_env_from():
    @graph
    def user_defined_k8s_env_from_tags_graph():
        pass

    # These fields still work even when using underscore keys
    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_env_from_tags_graph.to_job(
            tags={
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "envFrom": [
                            {
                                "configMapRef": {
                                    "name": "user_config_map_ref",
                                    "optional": "True",
                                }
                            },
                            {
                                "secretRef": {
                                    "name": "user_secret_ref_one",
                                    "optional": "True"
                                }
                            },
                            {
                                "secretRef": {
                                    "name": "user_secret_ref_two",
                                    "optional": "False",
                                },
                                "prefix": "with_prefix",
                            },
                        ]
                    }
                }
            }).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
        env_config_maps=["config_map"],
        env_secrets=["secret"],
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    env_from = job["spec"]["template"]["spec"]["containers"][0]["env_from"]
    env_from_mapping = {(env_var.get("config_map_ref")
                         or env_var.get("secret_ref")).get("name"): env_var
                        for env_var in env_from}

    assert len(env_from_mapping) == 5
    assert env_from_mapping["config_map"]
    assert env_from_mapping["user_config_map_ref"]
    assert env_from_mapping["secret"]
    assert env_from_mapping["user_secret_ref_one"]
    assert env_from_mapping["user_secret_ref_two"]
Exemplo n.º 9
0
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority):
    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    task = create_k8s_job_task(app)

    recon_repo = pipeline_context.pipeline.get_reconstructable_repository()

    task_signature = task.si(
        instance_ref_dict=pipeline_context.instance.get_ref().to_dict(),
        step_keys=[step.key],
        run_config=pipeline_context.pipeline_run.run_config,
        mode=pipeline_context.pipeline_run.mode,
        repo_name=recon_repo.get_definition().name,
        repo_location_name=pipeline_context.executor.repo_location_name,
        run_id=pipeline_context.pipeline_run.run_id,
        job_config_dict=pipeline_context.executor.job_config.to_dict(),
        job_namespace=pipeline_context.executor.job_namespace,
        user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(),
        retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(),
        pipeline_origin_packed=pack_value(pipeline_context.pipeline.get_origin()),
        load_incluster_config=pipeline_context.executor.load_incluster_config,
        kubeconfig_file=pipeline_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_k8s_job".format(queue=queue),
    )
Exemplo n.º 10
0
def test_backcompat_resource_tags():
    @solid(
        tags={
            K8S_RESOURCE_REQUIREMENTS_KEY: {
                "requests": {
                    "cpu": "250m",
                    "memory": "64Mi"
                },
                "limits": {
                    "cpu": "500m",
                    "memory": "2560Mi"
                },
            }
        })
    def resource_tags_solid(_):
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(
        resource_tags_solid.tags)

    assert user_defined_k8s_config.container_config
    assert user_defined_k8s_config.container_config["resources"]
    resources = user_defined_k8s_config.container_config["resources"]
    assert resources["requests"]["cpu"] == "250m"
    assert resources["requests"]["memory"] == "64Mi"
    assert resources["limits"]["cpu"] == "500m"
    assert resources["limits"]["memory"] == "2560Mi"
Exemplo n.º 11
0
def test_construct_dagster_k8s_job_with_user_defined_env():
    @graph
    def user_defined_k8s_env_tags_graph():
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_env_tags_graph.to_job(
            tags={
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "env": [
                            {
                                "name": "ENV_VAR_1",
                                "value": "one"
                            },
                            {
                                "name": "ENV_VAR_2",
                                "value": "two"
                            },
                            {
                                "name": "DD_AGENT_HOST",
                                "valueFrom": {
                                    "fieldRef": {
                                        "fieldPath": "status.hostIP"
                                    }
                                },
                            },
                        ]
                    }
                }
            }).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    env = job["spec"]["template"]["spec"]["containers"][0]["env"]
    env_mapping = remove_none_recursively(
        {env_var["name"]: env_var
         for env_var in env})

    # Has DAGSTER_HOME and three additional env vars
    assert len(env_mapping) == 4
    assert env_mapping["ENV_VAR_1"]["value"] == "one"
    assert env_mapping["ENV_VAR_2"]["value"] == "two"
    assert env_mapping["DD_AGENT_HOST"]["value_from"] == {
        "field_ref": {
            "field_path": "status.hostIP"
        }
    }
Exemplo n.º 12
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            'requests': {
                'cpu': '250m',
                'memory': '64Mi'
            },
            'limits': {
                'cpu': '500m',
                'memory': '2560Mi'
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component='run_coordinator',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
        ).strip())
Exemplo n.º 13
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster-graphql"],
        args=[
            "-p",
            "executeRunInProcess",
            "-v",
            seven.json.dumps({"runId": run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
Exemplo n.º 14
0
def test_construct_dagster_k8s_job_with_user_defined_volume_mounts_snake_case(
):
    @graph
    def user_defined_k8s_volume_mounts_tags_graph():
        pass

    # volume_mounts still work even when using underscore keys
    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_volume_mounts_tags_graph.to_job(
            tags={
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "volume_mounts": [
                            {
                                "mountPath": "mount_path",
                                "mountPropagation": "mount_propagation",
                                "name": "a_volume_mount_one",
                                "readOnly": "False",
                                "subPath": "path/",
                            },
                            {
                                "mountPath": "mount_path",
                                "mountPropagation": "mount_propagation",
                                "name": "a_volume_mount_two",
                                "readOnly": "False",
                                "subPathExpr": "path/",
                            },
                        ]
                    }
                }
            }).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    volume_mounts = job["spec"]["template"]["spec"]["containers"][0][
        "volume_mounts"]
    volume_mounts_mapping = {
        volume_mount["name"]: volume_mount
        for volume_mount in volume_mounts
    }

    assert len(volume_mounts_mapping) == 3
    assert volume_mounts_mapping["dagster-instance"]
    assert volume_mounts_mapping["a_volume_mount_one"]
    assert volume_mounts_mapping["a_volume_mount_two"]
Exemplo n.º 15
0
    def launch_steps(
        self,
        step_contexts: List[IStepContext],
        known_state: KnownExecutionState,
    ):
        assert len(
            step_contexts
        ) == 1, "Launching multiple steps is not currently supported"
        step_context = step_contexts[0]

        k8s_name_key = get_k8s_job_name(
            self.pipeline_context.plan_data.pipeline_run.run_id,
            step_context.step.key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)
        pod_name = "dagster-job-%s" % (k8s_name_key)
        pipeline_origin = self.pipeline_context.reconstructable_pipeline.get_python_origin(
        )

        execute_step_args = ExecuteStepArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=self.pipeline_context.pipeline_run.run_id,
            step_keys_to_execute=[step_context.step.key],
            instance_ref=self.pipeline_context.instance.get_ref(),
            retry_mode=self.retries.for_inner_plan(),
            known_state=known_state,
            should_verify_step=True,
        )

        input_json = serialize_dagster_namedtuple(execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                pipeline_origin.repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the pipeline")

        job = construct_dagster_k8s_job(
            job_config,
            args,
            job_name,
            get_user_defined_k8s_config(frozentags()),
            pod_name,
        )

        kubernetes.config.load_incluster_config()
        kubernetes.client.BatchV1Api().create_namespaced_job(
            body=job, namespace=self._job_namespace)
Exemplo n.º 16
0
def test_k8s_tag_op():
    assert my_op
    user_defined_cfg = get_user_defined_k8s_config(my_op.tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="test",
    )
    job = construct_dagster_k8s_job(cfg, [],
                                    "job123",
                                    user_defined_k8s_config=user_defined_cfg)

    assert job.to_dict(
    )["spec"]["template"]["spec"]["containers"][0]["resources"] == {
        "requests": {
            "cpu": "200m",
            "memory": "32Mi"
        },
        "limits": None,
    }
Exemplo n.º 17
0
def _submit_task_k8s_job(app, plan_context, step, queue, priority, known_state):
    user_defined_k8s_config = get_user_defined_k8s_config(step.tags)

    pipeline_origin = plan_context.reconstructable_pipeline.get_python_origin()

    execute_step_args = ExecuteStepArgs(
        pipeline_origin=pipeline_origin,
        pipeline_run_id=plan_context.pipeline_run.run_id,
        step_keys_to_execute=[step.key],
        instance_ref=plan_context.instance.get_ref(),
        retry_mode=plan_context.executor.retries.for_inner_plan(),
        known_state=known_state,
        should_verify_step=True,
    )

    job_config = plan_context.executor.job_config
    if not job_config.job_image:
        job_config = job_config.with_image(pipeline_origin.repository_origin.container_image)

    if not job_config.job_image:
        raise Exception("No image included in either executor config or the dagster job")

    task = create_k8s_job_task(app)
    task_signature = task.si(
        execute_step_args_packed=pack_value(execute_step_args),
        job_config_dict=job_config.to_dict(),
        job_namespace=plan_context.executor.job_namespace,
        user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(),
        load_incluster_config=plan_context.executor.load_incluster_config,
        job_wait_timeout=plan_context.executor.job_wait_timeout,
        kubeconfig_file=plan_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key="{queue}.execute_step_k8s_job".format(queue=queue),
    )
Exemplo n.º 18
0
def test_user_defined_config_from_tags():

    config_args = {
        "container_config": {
            "resources": {
                "requests": {
                    "cpu": "500m",
                    "memory": "128Mi"
                },
                "limits": {
                    "cpu": "1000m",
                    "memory": "1Gi"
                },
            }
        },
        "pod_template_spec_metadata": {
            "pod_template_spec_key": "pod_template_spec_value"
        },
        "pod_spec_config": {
            "pod_spec_config_key": "pod_spec_config_value"
        },
        "job_config": {
            "job_config_key": "job_config_value"
        },
        "job_metadata": {
            "job_metadata_key": "job_metadata_value"
        },
        "job_spec_config": {
            "job_spec_config_key": "job_spec_config_value"
        },
    }

    @pipeline(tags={USER_DEFINED_K8S_CONFIG_KEY: config_args})
    def my_pipeline():
        pass

    assert get_user_defined_k8s_config(
        my_pipeline.tags) == UserDefinedDagsterK8sConfig(**config_args)
Exemplo n.º 19
0
def test_user_defined_config_from_tags():

    config_args = {
        "container_config": {
            "resources": {
                "requests": {"cpu": "500m", "memory": "128Mi"},
                "limits": {"cpu": "1000m", "memory": "1Gi"},
            }
        },
        "pod_template_spec_metadata": {"namespace": "pod_template_spec_value"},
        "pod_spec_config": {"dns_policy": "pod_spec_config_value"},
        "job_config": {"status": {"completed_indexes": "job_config_value"}},
        "job_metadata": {"namespace": "job_metadata_value"},
        "job_spec_config": {"backoff_limit": 120},
    }

    @pipeline(tags={USER_DEFINED_K8S_CONFIG_KEY: config_args})
    def my_pipeline():
        pass

    assert get_user_defined_k8s_config(my_pipeline.tags) == UserDefinedDagsterK8sConfig(
        **config_args
    )
Exemplo n.º 20
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None
        if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin):
            if exc_config.get("job_image"):
                raise DagsterInvariantViolationError(
                    "Cannot specify job_image in executor config when loading pipeline "
                    "from GRPC server."
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".format(
                        type(repository_location_handle)
                    )
                )

            job_image = repository_location_handle.get_current_image()
            env_vars = {"DAGSTER_CURRENT_IMAGE": job_image}

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.get_repository_python_origin(
                    repository_name
                ),
            )

        else:
            job_image = exc_config.get("job_image")
            if not job_image:
                raise DagsterInvariantViolationError(
                    "Cannot find job_image in celery-k8s executor config."
                )
            pipeline_origin = external_pipeline.get_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            command=["dagster"],
            args=["api", "execute_run_with_structured_logs", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            "Kubernetes run_coordinator job launched",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
Exemplo n.º 21
0
def test_valid_job_format_with_user_defined_k8s_config(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        USER_DEFINED_K8S_CONFIG_KEY: ({
            "container_config": {
                "resources": {
                    "requests": {
                        "cpu": "250m",
                        "memory": "64Mi"
                    },
                    "limits": {
                        "cpu": "500m",
                        "memory": "2560Mi"
                    },
                }
            },
            "pod_template_spec_metadata": {
                "annotations": {
                    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
                },
                "labels": {
                    "spotinst.io/restrict-scale-down": "true"
                },
            },
            "pod_spec_config": {
                "affinity": {
                    "nodeAffinity": {
                        "requiredDuringSchedulingIgnoredDuringExecution": {
                            "nodeSelectorTerms": [{
                                "matchExpressions": [{
                                    "key":
                                    "kubernetes.io/e2e-az-name",
                                    "operator":
                                    "In",
                                    "values": ["e2e-az1", "e2e-az2"],
                                }]
                            }]
                        }
                    }
                }
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(remove_none_recursively(job.to_dict()),
                      default_flow_style=False).strip() ==
            EXPECTED_CONFIGURED_JOB_SPEC.format(
                run_id=run.run_id,
                job_image=docker_image,
                image_pull_policy=image_pull_policy(),
                dagster_version=dagster_version,
                labels="spotinst.io/restrict-scale-down: 'true'",
                resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
                annotations="""annotations:
        cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'""",
                affinity="""affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/e2e-az-name
                operator: In
                values:
                - e2e-az1
                - e2e-az2""",
            ).strip())
Exemplo n.º 22
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        # If the user is using user-code deployments, we grab the image from the gRPC server.
        if isinstance(
                external_pipeline.get_external_origin().
                external_repository_origin.repository_location_origin,
                GrpcServerRepositoryLocationOrigin,
        ):

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".
                    format(type(repository_location_handle)))

            repository_name = external_pipeline.repository_handle.repository_name
            repository_origin = repository_location_handle.reload_repository_python_origin(
                repository_name)
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_origin)

            job_image = repository_origin.container_image
            env_vars = {"DAGSTER_CURRENT_IMAGE": job_image}

            if job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    "but also {job_image} in your user-code deployment. You cannot specify a job_image "
                    "in your executor config when using user-code deployments because the job image is "
                    "pulled from the deployment. To resolve this error, remove the job_image "
                    "configuration from your executor configuration (which is a part of your run configuration)"
                )

        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured.")

            job_image = job_image_from_executor_config
            pipeline_origin = external_pipeline.get_python_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run_coordinator job launched",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
        return run
Exemplo n.º 23
0
    def launch_run(self, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        pipeline_origin = external_pipeline.get_python_origin()
        repository_origin = pipeline_origin.repository_origin

        job_image = repository_origin.container_image

        if job_image:
            if job_image_from_executor_config:
                job_image = job_image_from_executor_config
                self._instance.report_engine_event(
                    f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} "
                    f"from executor configuration as it takes precedence.",
                    run,
                    cls=self.__class__,
                )
        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured."
                )

            job_image = job_image_from_executor_config

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
Exemplo n.º 24
0
def test_valid_job_format_with_user_defined_k8s_config(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        USER_DEFINED_K8S_CONFIG_KEY: ({
            'container_config': {
                'resources': {
                    'requests': {
                        'cpu': '250m',
                        'memory': '64Mi'
                    },
                    'limits': {
                        'cpu': '500m',
                        'memory': '2560Mi'
                    },
                }
            },
            'pod_template_spec_metadata': {
                'annotations': {
                    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
                }
            },
            'pod_spec_config': {
                'affinity': {
                    'nodeAffinity': {
                        'requiredDuringSchedulingIgnoredDuringExecution': {
                            'nodeSelectorTerms': [{
                                'matchExpressions': [{
                                    'key':
                                    'kubernetes.io/e2e-az-name',
                                    'operator':
                                    'In',
                                    'values': ['e2e-az1', 'e2e-az2'],
                                }]
                            }]
                        }
                    }
                }
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component='run_coordinator',
    )

    assert (yaml.dump(remove_none_recursively(job.to_dict()),
                      default_flow_style=False).strip() ==
            EXPECTED_CONFIGURED_JOB_SPEC.format(
                run_id=run.run_id,
                job_image=docker_image,
                image_pull_policy=image_pull_policy(),
                dagster_version=dagster_version,
                resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
                annotations='''annotations:
        cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'''',
                affinity='''affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/e2e-az-name
                operator: In
                values:
                - e2e-az1
                - e2e-az2''',
            ).strip())
Exemplo n.º 25
0
    def launch_run(self, context: LaunchRunContext) -> None:
        run = context.pipeline_run

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        pipeline_origin = context.pipeline_code_origin
        repository_origin = pipeline_origin.repository_origin

        job_image = repository_origin.container_image

        if job_image:
            if job_image_from_executor_config:
                job_image = job_image_from_executor_config
                self._instance.report_engine_event(
                    f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} "
                    f"from executor configuration as it takes precedence.",
                    run,
                    cls=self.__class__,
                )
        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured.")

            job_image = job_image_from_executor_config

        job_config = self.get_k8s_job_config(job_image, exc_config)

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        run_args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        )

        job = construct_dagster_k8s_job(
            job_config,
            args=run_args.get_command_args(),
            job_name=job_name,
            pod_name=pod_name,
            component="run_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
            labels={
                "dagster/job": pipeline_origin.pipeline_name,
            },
        )

        job_namespace = exc_config.get("job_namespace")

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
Exemplo n.º 26
0
def test_tags_to_dynamic_plan():
    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {
                            "cpu": "500m",
                            "memory": "128Mi"
                        },
                        "limits": {
                            "cpu": "1000m",
                            "memory": "1Gi"
                        },
                    }
                }
            }
        })
    def multiply_inputs(_, x):
        return 2 * x

    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {
                            "cpu": "250m",
                            "memory": "64Mi"
                        },
                        "limits": {
                            "cpu": "500m",
                            "memory": "2560Mi"
                        },
                    }
                }
            }
        },
        output_defs=[DynamicOutputDefinition()],
    )
    def emit(_):
        for i in range(3):
            yield DynamicOutput(value=i, mapping_key=str(i))

    @pipeline
    def k8s_ready():
        return emit().map(multiply_inputs)

    known_state = KnownExecutionState(
        {},
        {
            emit.name: {
                "result": ["0", "1", "2"]
            },
        },
    )
    plan = create_execution_plan(k8s_ready, known_state=known_state)

    emit_step = plan.get_step_by_key(emit.name)
    user_defined_k8s_config = get_user_defined_k8s_config(emit_step.tags)

    assert user_defined_k8s_config.container_config
    assert user_defined_k8s_config.container_config["resources"]

    resources = user_defined_k8s_config.container_config["resources"]

    assert resources["requests"]["cpu"] == "250m"
    assert resources["requests"]["memory"] == "64Mi"
    assert resources["limits"]["cpu"] == "500m"
    assert resources["limits"]["memory"] == "2560Mi"

    for mapping_key in range(3):
        multiply_inputs_step = plan.get_step_by_key(
            f"{multiply_inputs.name}[{mapping_key}]")
        dynamic_step_user_defined_k8s_config = get_user_defined_k8s_config(
            multiply_inputs_step.tags)

        assert dynamic_step_user_defined_k8s_config.container_config
        assert dynamic_step_user_defined_k8s_config.container_config[
            "resources"]

        resources = dynamic_step_user_defined_k8s_config.container_config[
            "resources"]

        assert resources["requests"]["cpu"] == "500m"
        assert resources["requests"]["memory"] == "128Mi"
        assert resources["limits"]["cpu"] == "1000m"
        assert resources["limits"]["memory"] == "1Gi"