Пример #1
0
def whitelist_for_serdes(
    __cls: Optional[Type] = None,
    *,
    serializer: Optional[Type["Serializer"]] = None,
    storage_name: Optional[str] = None,
):
    """
    Decorator to whitelist a NamedTuple or enum to be serializable. If a `storage_name` is provided
    for a NamedTuple, then serialized instances of the NamedTuple will be stored with under the
    `storage_name` instead of the class name. This is primarily useful for maintaining backwards
    compatibility. If a serialized object undergoes a name change, then setting `storage_name` to
    the old name will (a) allow the object to be deserialized by versions of Dagster prior to the
    name change; (b) allow Dagster to load objects stored using the old name.

    @whitelist_for_serdes
    class

    """
    check.invariant(
        not storage_name
        or (serializer is None or issubclass(serializer, DefaultNamedTupleSerializer)),
        "storage_name can only be used with DefaultNamedTupleSerializer",
    )
    if __cls is not None:  # decorator invoked directly on class
        check.class_param(__cls, "__cls")
        return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP)(__cls)
    else:  # decorator passed params
        check.opt_class_param(serializer, "serializer", superclass=Serializer)
        serializer = cast(Type[Serializer], serializer)
        return _whitelist_for_serdes(
            whitelist_map=_WHITELIST_MAP, serializer=serializer, storage_name=storage_name
        )
Пример #2
0
    def report_engine_event(
        self,
        message,
        pipeline_run,
        engine_event_data=None,
        cls=None,
        step_key=None,
    ):
        '''
        Report a EngineEvent that occurred outside of a pipeline execution context.
        '''
        from dagster.core.events import EngineEventData, DagsterEvent, DagsterEventType
        from dagster.core.events.log import DagsterEventRecord

        check.class_param(cls, 'cls')
        check.str_param(message, 'message')
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
        engine_event_data = check.opt_inst_param(
            engine_event_data,
            'engine_event_data',
            EngineEventData,
            EngineEventData([]),
        )

        if cls:
            message = "[{}] {}".format(cls.__name__, message)

        log_level = logging.INFO
        if engine_event_data and engine_event_data.error:
            log_level = logging.ERROR

        dagster_event = DagsterEvent(
            event_type_value=DagsterEventType.ENGINE_EVENT.value,
            pipeline_name=pipeline_run.pipeline_name,
            message=message,
            event_specific_data=engine_event_data,
        )
        event_record = DagsterEventRecord(
            message=message,
            user_message=message,
            level=log_level,
            pipeline_name=pipeline_run.pipeline_name,
            run_id=pipeline_run.run_id,
            error_info=None,
            timestamp=time.time(),
            step_key=step_key,
            dagster_event=dagster_event,
        )

        self.handle_new_event(event_record)
        return dagster_event
Пример #3
0
    def __init__(
        self,
        python_type: t.Union[t.Type, t.Tuple[t.Type, ...]],
        key: t.Optional[str] = None,
        name: t.Optional[str] = None,
        **kwargs,
    ):
        if isinstance(python_type, tuple):
            self.python_type = check.tuple_param(
                python_type, "python_type", of_shape=tuple(type for item in python_type)
            )
            self.type_str = "Union[{}]".format(
                ", ".join(python_type.__name__ for python_type in python_type)
            )
            typing_type = t.Union[python_type]  # type: ignore

        else:
            self.python_type = check.class_param(python_type, "python_type")  # type: ignore
            self.type_str = cast(str, python_type.__name__)
            typing_type = self.python_type  # type: ignore
        name = check.opt_str_param(name, "name", self.type_str)
        key = check.opt_str_param(key, "key", name)
        super(PythonObjectDagsterType, self).__init__(
            key=key,
            name=name,
            type_check_fn=isinstance_type_check_fn(python_type, name, self.type_str),
            typing_type=typing_type,
            **kwargs,
        )
Пример #4
0
def user_code_error_boundary(error_cls, msg_fn, log_manager=None, **kwargs):
    """
    Wraps the execution of user-space code in an error boundary. This places a uniform
    policy around any user code invoked by the framework. This ensures that all user
    errors are wrapped in an exception derived from DagsterUserCodeExecutionError,
    and that the original stack trace of the user error is preserved, so that it
    can be reported without confusing framework code in the stack trace, if a
    tool author wishes to do so.

    Examples:

    .. code-block:: python

        with user_code_error_boundary(
            # Pass a class that inherits from DagsterUserCodeExecutionError
            DagsterExecutionStepExecutionError,
            # Pass a function that produces a message
            "Error occurred during step execution"
        ):
            call_user_provided_function()

    """
    check.callable_param(msg_fn, "msg_fn")
    check.class_param(error_cls,
                      "error_cls",
                      superclass=DagsterUserCodeExecutionError)

    with raise_execution_interrupts():
        if log_manager:
            log_manager.begin_python_log_capture()
        try:
            yield
        except DagsterError as de:
            # The system has thrown an error that is part of the user-framework contract
            raise de
        except Exception as e:  # pylint: disable=W0703
            # An exception has been thrown by user code and computation should cease
            # with the error reported further up the stack
            raise error_cls(msg_fn(),
                            user_exception=e,
                            original_exc_info=sys.exc_info(),
                            **kwargs) from e
        finally:
            if log_manager:
                log_manager.end_python_log_capture()
Пример #5
0
def whitelist_for_serdes(__cls: Optional[Type] = None,
                         *,
                         serializer: Optional[Type["Serializer"]] = None):
    """
    Decorator to whitelist a named tuple or enum to be serializable.

    @whitelist_for_serdes
    class

    """

    if __cls is not None:  # decorator invoked directly on class
        check.class_param(__cls, "__cls")
        return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP,
                                     serializer=None)(__cls)
    else:  # decorator passed params
        check.subclass_param(serializer, "serializer", Serializer)
        serializer = cast(Type[Serializer], serializer)
        return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP,
                                     serializer=serializer)
Пример #6
0
 def __init__(
     self,
     generator: Generator[Union["DagsterEvent", GeneratedContext], None,
                          None],
     object_cls: Type[GeneratedContext],
     require_object: Optional[bool] = True,
 ):
     self.generator = check.generator(generator)
     self.object_cls: Type[GeneratedContext] = check.class_param(
         object_cls, "object_cls")
     self.require_object = check.bool_param(require_object,
                                            "require_object")
     self.object: Optional[GeneratedContext] = None
     self.did_setup = False
     self.did_teardown = False
Пример #7
0
def make_airflow_dag_for_operator(
    recon_repo,
    job_name,
    operator,
    run_config=None,
    mode=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
    pipeline_name=None,
):
    """Construct an Airflow DAG corresponding to a given Dagster job/pipeline and custom operator.

    `Custom operator template <https://github.com/dagster-io/dagster/blob/master/python_modules/dagster-test/dagster_test/dagster_airflow/custom_operator.py>`_

    Tasks in the resulting DAG will execute the Dagster logic they encapsulate run by the given
    Operator :py:class:`BaseOperator <airflow.models.BaseOperator>`. If you
    are looking for a containerized solution to provide better isolation, see instead
    :py:func:`make_airflow_dag_containerized`.

    This function should be invoked in an Airflow DAG definition file, such as that created by an
    invocation of the dagster-airflow scaffold CLI tool.

    Args:
        recon_repo (:class:`dagster.ReconstructableRepository`): reference to a Dagster RepositoryDefinition
            that can be reconstructed in another process
        job_name (str): The name of the job definition.
        operator (type): The operator to use. Must be a class that inherits from
            :py:class:`BaseOperator <airflow.models.BaseOperator>`
        run_config (Optional[dict]): The config, if any, with which to compile
            the pipeline to an execution plan, as a Python dict.
        mode (Optional[str]): The mode in which to execute the pipeline.
        instance (Optional[DagsterInstance]): The Dagster instance to use to execute the pipeline.
        dag_id (Optional[str]): The id to use for the compiled Airflow DAG (passed through to
            :py:class:`DAG <airflow:airflow.models.DAG>`).
        dag_description (Optional[str]): The description to use for the compiled Airflow DAG
            (passed through to :py:class:`DAG <airflow:airflow.models.DAG>`)
        dag_kwargs (Optional[dict]): Any additional kwargs to pass to the Airflow
            :py:class:`DAG <airflow:airflow.models.DAG>` constructor, including ``default_args``.
        op_kwargs (Optional[dict]): Any additional kwargs to pass to the underlying Airflow
            operator.
        pipeline_name (str): (legacy) The name of the pipeline definition.

    Returns:
        (airflow.models.DAG, List[airflow.models.BaseOperator]): The generated Airflow DAG, and a
        list of its constituent tasks.
    """
    check.class_param(operator, "operator", superclass=BaseOperator)

    job_name = canonicalize_backcompat_args(
        new_val=job_name,
        new_arg="job_name",
        old_val=pipeline_name,
        old_arg="pipeline_name",
        breaking_version="future versions",
        coerce_old_to_new=lambda val: val,
    )

    return _make_airflow_dag(
        recon_repo=recon_repo,
        job_name=job_name,
        run_config=run_config,
        mode=mode,
        dag_id=dag_id,
        dag_description=dag_description,
        dag_kwargs=dag_kwargs,
        op_kwargs=op_kwargs,
        operator=operator,
    )
Пример #8
0
def _make_airflow_dag(
    recon_repo,
    job_name,
    run_config=None,
    mode=None,
    instance=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
    operator=DagsterPythonOperator,
):
    check.inst_param(recon_repo, "recon_repo", ReconstructableRepository)
    check.str_param(job_name, "job_name")
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode, "mode")
    # Default to use the (persistent) system temp directory rather than a TemporaryDirectory,
    # which would not be consistent between Airflow task invocations.

    if instance is None:
        if is_dagster_home_set():
            instance = DagsterInstance.get()
        else:
            instance = DagsterInstance.local_temp(
                tempdir=seven.get_system_temp_directory())

    check.inst_param(instance, "instance", DagsterInstance)

    # Only used for Airflow; internally we continue to use pipeline.name
    dag_id = check.opt_str_param(dag_id, "dag_id",
                                 _rename_for_airflow(job_name))

    dag_description = check.opt_str_param(dag_description, "dag_description",
                                          _make_dag_description(job_name))
    check.class_param(operator, "operator", superclass=BaseOperator)

    dag_kwargs = dict(
        {"default_args": DEFAULT_ARGS},
        **check.opt_dict_param(dag_kwargs, "dag_kwargs", key_type=str),
    )

    op_kwargs = check.opt_dict_param(op_kwargs, "op_kwargs", key_type=str)

    dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs)
    pipeline = recon_repo.get_definition().get_pipeline(job_name)

    if mode is None:
        mode = pipeline.get_default_mode_name()

    execution_plan = create_execution_plan(pipeline, run_config, mode=mode)

    tasks = {}

    coalesced_plan = coalesce_execution_steps(execution_plan)

    for solid_handle, solid_steps in coalesced_plan.items():
        step_keys = [step.key for step in solid_steps]

        operator_parameters = DagsterOperatorParameters(
            recon_repo=recon_repo,
            pipeline_name=job_name,
            run_config=run_config,
            mode=mode,
            task_id=solid_handle,
            step_keys=step_keys,
            dag=dag,
            instance_ref=instance.get_ref(),
            op_kwargs=op_kwargs,
            pipeline_snapshot=pipeline.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan,
                pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()),
        )
        task = operator(operator_parameters)

        tasks[solid_handle] = task

        for solid_step in solid_steps:
            for step_input in solid_step.step_inputs:
                for key in step_input.dependency_keys:
                    prev_solid_handle = execution_plan.get_step_by_key(
                        key).solid_handle.to_string()
                    if solid_handle != prev_solid_handle:
                        tasks[prev_solid_handle].set_downstream(task)

    return (dag,
            [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
Пример #9
0
def test_class_param():
    class Bar:
        pass

    assert check.class_param(int, "foo")
    assert check.class_param(Bar, "foo")

    with pytest.raises(CheckError):
        check.class_param(None, "foo")

    with pytest.raises(CheckError):
        check.class_param(check, "foo")

    with pytest.raises(CheckError):
        check.class_param(234, "foo")

    with pytest.raises(CheckError):
        check.class_param("bar", "foo")

    with pytest.raises(CheckError):
        check.class_param(Bar(), "foo")

    class Super:
        pass

    class Sub(Super):
        pass

    class Alone:
        pass

    assert check.class_param(Sub, "foo", superclass=Super)

    with pytest.raises(CheckError):
        assert check.class_param(Alone, "foo", superclass=Super)

    with pytest.raises(CheckError):
        assert check.class_param("value", "foo", superclass=Super)

    assert check.opt_class_param(Sub, "foo", superclass=Super)
    assert check.opt_class_param(None, "foo", superclass=Super) is None

    with pytest.raises(CheckError):
        assert check.opt_class_param(Alone, "foo", superclass=Super)

    with pytest.raises(CheckError):
        assert check.opt_class_param("value", "foo", superclass=Super)
Пример #10
0
def whitelist_for_persistence(klass):
    check.class_param(klass, "klass")
    return compose(
        _whitelist_for_persistence(whitelist_map=_WHITELIST_MAP),
        _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP),
    )(klass)
Пример #11
0
def whitelist_for_serdes(klass):
    check.class_param(klass, "klass")
    return _whitelist_for_serdes(whitelist_map=_WHITELIST_MAP)(klass)
Пример #12
0
def solid_execution_error_boundary(error_cls, msg_fn, step_context, **kwargs):
    """
    A specialization of user_code_error_boundary for the steps involved in executing a solid.
    This variant supports the control flow exceptions RetryRequested and Failure as well
    as respecting the RetryPolicy if present.
    """
    from dagster.core.execution.context.system import StepExecutionContext

    check.callable_param(msg_fn, "msg_fn")
    check.class_param(error_cls,
                      "error_cls",
                      superclass=DagsterUserCodeExecutionError)
    check.inst_param(step_context, "step_context", StepExecutionContext)

    with raise_execution_interrupts():

        step_context.log.begin_python_log_capture()
        retry_policy = step_context.solid_retry_policy

        try:
            yield
        except DagsterError as de:
            # The system has thrown an error that is part of the user-framework contract
            raise de

        except Exception as e:  # pylint: disable=W0703
            # An exception has been thrown by user code and computation should cease
            # with the error reported further up the stack

            # Directly thrown RetryRequested escalate before evaluating the retry policy.
            if isinstance(e, RetryRequested):
                raise e

            if retry_policy:
                raise RetryRequested(
                    max_retries=retry_policy.max_retries,
                    seconds_to_wait=retry_policy.calculate_delay(
                        step_context.previous_attempt_count + 1),
                ) from e

            # Failure exceptions get re-throw without wrapping
            if isinstance(e, Failure):
                raise e

            # Otherwise wrap the user exception with context
            raise error_cls(
                msg_fn(),
                user_exception=e,
                original_exc_info=sys.exc_info(),
                **kwargs,
            ) from e

        except (DagsterExecutionInterruptedError, KeyboardInterrupt) as ie:
            # respect retry policy when interrupts occur
            if retry_policy:
                raise RetryRequested(
                    max_retries=retry_policy.max_retries,
                    seconds_to_wait=retry_policy.calculate_delay(
                        step_context.previous_attempt_count + 1),
                ) from ie
            else:
                raise ie

        finally:
            step_context.log.end_python_log_capture()