Пример #1
0
    def launch_step(self, step_handler_context: StepHandlerContext):
        events = []

        assert (len(
            step_handler_context.execute_step_args.step_keys_to_execute) == 1
                ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        k8s_name_key = get_k8s_job_name(
            step_handler_context.execute_step_args.pipeline_run_id,
            step_key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)
        pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            step_handler_context.execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                step_handler_context.execute_step_args.pipeline_origin.
                repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the pipeline")

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(step_handler_context.step_tags[step_key]))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="step_worker",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        events.append(
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Executing step {step_key} in Kubernetes job {job_name}",
                event_specific_data=EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                ], ),
            ))

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self._job_namespace)

        return events
Пример #2
0
def test_pipeline_run_dagster_run():
    # serialize in current code
    test_run = DagsterRun(pipeline_name="test")
    test_str = serialize_dagster_namedtuple(test_run)

    # deserialize in "legacy" code
    legacy_env = WhitelistMap.create()

    @_whitelist_for_serdes(legacy_env)
    class PipelineRun(
        namedtuple(
            "_PipelineRun",
            "pipeline_name run_id run_config mode solid_selection solids_to_execute "
            "step_keys_to_execute status tags root_run_id parent_run_id "
            "pipeline_snapshot_id execution_plan_snapshot_id external_pipeline_origin "
            "pipeline_code_origin",
        )
    ):
        pass

    @_whitelist_for_serdes(legacy_env)  # pylint: disable=unused-variable
    class PipelineRunStatus(Enum):
        QUEUED = "QUEUED"
        NOT_STARTED = "NOT_STARTED"

    result = _deserialize_json(test_str, legacy_env)
    assert isinstance(result, PipelineRun)
    assert result.pipeline_name == test_run.pipeline_name
Пример #3
0
def test_skip_when_empty():
    test_map = WhitelistMap()

    @_whitelist_for_serdes(whitelist_map=test_map)
    class SameSnapshotTuple(namedtuple("_Tuple", "foo")):
        def __new__(cls, foo):
            return super(SameSnapshotTuple, cls).__new__(cls, foo)  # pylint: disable=bad-super-call

    old_tuple = SameSnapshotTuple(foo="A")
    old_serialized = serialize_dagster_namedtuple(old_tuple)
    old_snapshot = create_snapshot_id(old_tuple)

    # Without setting skip_when_empty, the ID changes

    @_whitelist_for_serdes(whitelist_map=test_map)
    class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")):  # pylint: disable=function-redefined
        def __new__(cls, foo, bar=None):
            return super(SameSnapshotTuple, cls).__new__(  # pylint: disable=bad-super-call
                cls, foo, bar)

    new_tuple_without_serializer = SameSnapshotTuple(foo="A")
    new_snapshot_without_serializer = create_snapshot_id(
        new_tuple_without_serializer)

    assert new_snapshot_without_serializer != old_snapshot

    # By setting a custom serializer and skip_when_empty, the snapshot stays the same
    # as long as the new field is None

    class SkipWhenEmptySerializer(DefaultNamedTupleSerializer):
        @classmethod
        def skip_when_empty(cls) -> Set[str]:
            return {"bar"}

    @_whitelist_for_serdes(whitelist_map=test_map,
                           serializer=SkipWhenEmptySerializer)
    class SameSnapshotTuple(namedtuple("_Tuple", "foo bar")):  # pylint: disable=function-redefined
        def __new__(cls, foo, bar=None):
            return super(SameSnapshotTuple, cls).__new__(  # pylint: disable=bad-super-call
                cls, foo, bar)

    new_tuple = SameSnapshotTuple(foo="A")
    new_snapshot = create_snapshot_id(new_tuple)

    assert old_snapshot == new_snapshot

    rehydrated_tuple = deserialize_json_to_dagster_namedtuple(old_serialized)
    assert rehydrated_tuple.foo == "A"
    assert rehydrated_tuple.bar is None

    new_tuple_with_bar = SameSnapshotTuple(foo="A", bar="B")
    assert new_tuple_with_bar.foo == "A"
    assert new_tuple_with_bar.bar == "B"
Пример #4
0
    def launch_steps(
        self,
        step_contexts: List[IStepContext],
        known_state: KnownExecutionState,
    ):
        assert len(
            step_contexts
        ) == 1, "Launching multiple steps is not currently supported"
        step_context = step_contexts[0]

        k8s_name_key = get_k8s_job_name(
            self.pipeline_context.plan_data.pipeline_run.run_id,
            step_context.step.key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)
        pod_name = "dagster-job-%s" % (k8s_name_key)
        pipeline_origin = self.pipeline_context.reconstructable_pipeline.get_python_origin(
        )

        execute_step_args = ExecuteStepArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=self.pipeline_context.pipeline_run.run_id,
            step_keys_to_execute=[step_context.step.key],
            instance_ref=self.pipeline_context.instance.get_ref(),
            retry_mode=self.retries.for_inner_plan(),
            known_state=known_state,
            should_verify_step=True,
        )

        input_json = serialize_dagster_namedtuple(execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                pipeline_origin.repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the pipeline")

        job = construct_dagster_k8s_job(
            job_config,
            args,
            job_name,
            get_user_defined_k8s_config(frozentags()),
            pod_name,
        )

        kubernetes.config.load_incluster_config()
        kubernetes.client.BatchV1Api().create_namespaced_job(
            body=job, namespace=self._job_namespace)
Пример #5
0
def test_solid_handle_node_handle():
    # serialize in current code
    test_handle = NodeHandle("test", None)
    test_str = serialize_dagster_namedtuple(test_handle)

    # deserialize in "legacy" code
    legacy_env = WhitelistMap.create()

    @_whitelist_for_serdes(legacy_env)
    class SolidHandle(namedtuple("_SolidHandle", "name parent")):
        pass

    result = _deserialize_json(test_str, legacy_env)
    assert isinstance(result, SolidHandle)
    assert result.name == test_handle.name
Пример #6
0
 def to_json(self):
     return serialize_dagster_namedtuple(self)
Пример #7
0
def test_external_job_origin_instigator_origin():
    def build_legacy_whitelist_map():
        legacy_env = WhitelistMap.create()

        @_whitelist_for_serdes(legacy_env)
        class ExternalJobOrigin(
                namedtuple("_ExternalJobOrigin",
                           "external_repository_origin job_name")):
            def get_id(self):
                return create_snapshot_id(self)

        @_whitelist_for_serdes(legacy_env)
        class ExternalRepositoryOrigin(
                namedtuple("_ExternalRepositoryOrigin",
                           "repository_location_origin repository_name")):
            def get_id(self):
                return create_snapshot_id(self)

        class GrpcServerOriginSerializer(DefaultNamedTupleSerializer):
            @classmethod
            def skip_when_empty(cls):
                return {"use_ssl"}

        @_whitelist_for_serdes(whitelist_map=legacy_env,
                               serializer=GrpcServerOriginSerializer)
        class GrpcServerRepositoryLocationOrigin(
                namedtuple(
                    "_GrpcServerRepositoryLocationOrigin",
                    "host port socket location_name use_ssl",
                ), ):
            def __new__(cls,
                        host,
                        port=None,
                        socket=None,
                        location_name=None,
                        use_ssl=None):
                return super(GrpcServerRepositoryLocationOrigin,
                             cls).__new__(cls, host, port, socket,
                                          location_name, use_ssl)

        return (
            legacy_env,
            ExternalJobOrigin,
            ExternalRepositoryOrigin,
            GrpcServerRepositoryLocationOrigin,
        )

    legacy_env, klass, repo_klass, location_klass = build_legacy_whitelist_map(
    )

    from dagster.core.host_representation.origin import (
        ExternalInstigatorOrigin,
        ExternalRepositoryOrigin,
        GrpcServerRepositoryLocationOrigin,
    )

    # serialize from current code, compare against old code
    instigator_origin = ExternalInstigatorOrigin(
        external_repository_origin=ExternalRepositoryOrigin(
            repository_location_origin=GrpcServerRepositoryLocationOrigin(
                host="localhost", port=1234, location_name="test_location"),
            repository_name="the_repo",
        ),
        instigator_name="simple_schedule",
    )
    instigator_origin_str = serialize_dagster_namedtuple(instigator_origin)
    instigator_to_job = _deserialize_json(instigator_origin_str, legacy_env)
    assert isinstance(instigator_to_job, klass)
    # ensure that the origin id is stable
    assert instigator_to_job.get_id() == instigator_origin.get_id()

    # # serialize from old code, compare against current code
    job_origin = klass(
        external_repository_origin=repo_klass(
            repository_location_origin=location_klass(
                host="localhost", port=1234, location_name="test_location"),
            repository_name="the_repo",
        ),
        job_name="simple_schedule",
    )
    job_origin_str = serialize_value(job_origin, legacy_env)
    from dagster.serdes.serdes import _WHITELIST_MAP

    job_to_instigator = deserialize_json_to_dagster_namedtuple(job_origin_str)
    assert isinstance(job_to_instigator, ExternalInstigatorOrigin)
    # ensure that the origin id is stable
    assert job_to_instigator.get_id() == job_origin.get_id()
Пример #8
0
def test_legacy_event_log_load():
    # ensure EventLogEntry 0.14.3+ can still be loaded by older dagster versions
    # to avoid downgrades etc from creating operational issues
    legacy_env = WhitelistMap.create()

    # snapshot of EventLogEntry pre commit ea19544
    @_whitelist_for_serdes(
        whitelist_map=legacy_env,
        storage_name="EventLogEntry",  # use this to avoid collision with current EventLogEntry
    )
    class OldEventLogEntry(  # pylint: disable=unused-variable
        NamedTuple(
            "_OldEventLogEntry",
            [
                ("error_info", Optional[SerializableErrorInfo]),
                ("message", str),
                ("level", Union[str, int]),
                ("user_message", str),
                ("run_id", str),
                ("timestamp", float),
                ("step_key", Optional[str]),
                ("pipeline_name", Optional[str]),
                ("dagster_event", Optional[DagsterEvent]),
            ],
        )
    ):
        def __new__(
            cls,
            error_info,
            message,
            level,
            user_message,
            run_id,
            timestamp,
            step_key=None,
            pipeline_name=None,
            dagster_event=None,
            job_name=None,
        ):
            pipeline_name = pipeline_name or job_name
            return super().__new__(
                cls,
                check.opt_inst_param(error_info, "error_info", SerializableErrorInfo),
                check.str_param(message, "message"),
                level,  # coerce_valid_log_level call omitted
                check.str_param(user_message, "user_message"),
                check.str_param(run_id, "run_id"),
                check.float_param(timestamp, "timestamp"),
                check.opt_str_param(step_key, "step_key"),
                check.opt_str_param(pipeline_name, "pipeline_name"),
                check.opt_inst_param(dagster_event, "dagster_event", DagsterEvent),
            )

    # current event log entry
    new_event = EventLogEntry(
        user_message="test 1 2 3",
        error_info=None,
        level="debug",
        run_id="fake_run_id",
        timestamp=time.time(),
    )

    storage_str = serialize_dagster_namedtuple(new_event)

    result = _deserialize_json(storage_str, legacy_env)
    assert result.message is not None