Exemplo n.º 1
0
def _type_check_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Any,
    version: Optional[str],
) -> Iterator[DagsterEvent]:
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.inst_param(output, "output", (Output, DynamicOutput))

    step_output = step_context.step.step_output_named(output.output_name)
    step_output_def = step_context.solid_def.output_def_named(step_output.name)

    dagster_type = step_output_def.dagster_type
    type_check_context = step_context.for_type(dagster_type)
    op_label = step_context.describe_op()
    output_type = type(output.value)

    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda:
        (f'Error occurred while type-checking output "{output.output_name}" of {op_label}, with '
         f"Python type {output_type} and Dagster type {dagster_type.display_name}"
         ),
            log_manager=type_check_context.log,
    ):
        type_check = do_type_check(type_check_context, dagster_type,
                                   output.value)

    yield DagsterEvent.step_output_event(
        step_context=step_context,
        step_output_data=StepOutputData(
            step_output_handle=step_output_handle,
            type_check_data=TypeCheckData(
                success=type_check.success,
                label=step_output_handle.output_name,
                description=type_check.description if type_check else None,
                metadata_entries=type_check.metadata_entries
                if type_check else [],
            ),
            version=version,
            metadata_entries=[
                entry for entry in output.metadata_entries
                if isinstance(entry, MetadataEntry)
            ],
        ),
    )

    if not type_check.success:
        raise DagsterTypeCheckDidNotPass(
            description=(
                f'Type check failed for step output "{output.output_name}" - '
                f'expected type "{dagster_type.display_name}". '
                f"Description: {type_check.description}"),
            metadata_entries=type_check.metadata_entries,
            dagster_type=dagster_type,
        )
Exemplo n.º 2
0
def _trigger_hook(
    step_context: StepExecutionContext, step_event_list: List[DagsterEvent]
) -> Iterator[DagsterEvent]:
    """Trigger hooks and record hook's operatonal events"""
    hook_defs = step_context.pipeline_def.get_all_hooks_for_handle(step_context.solid_handle)
    # when the solid doesn't have a hook configured
    if hook_defs is None:
        return

    op_label = step_context.describe_op()

    # when there are multiple hooks set on a solid, the hooks will run sequentially for the solid.
    # * we will not able to execute hooks asynchronously until we drop python 2.
    for hook_def in hook_defs:
        hook_context = step_context.for_hook(hook_def)

        try:
            with user_code_error_boundary(
                HookExecutionError,
                lambda: f"Error occurred during the execution of hook_fn triggered for {op_label}",
                log_manager=hook_context.log,
            ):
                hook_execution_result = hook_def.hook_fn(hook_context, step_event_list)

        except HookExecutionError as hook_execution_error:
            # catch hook execution error and field a failure event instead of failing the pipeline run
            yield DagsterEvent.hook_errored(step_context, hook_execution_error)
            continue

        check.invariant(
            isinstance(hook_execution_result, HookExecutionResult),
            (
                "Error in hook {hook_name}: hook unexpectedly returned result {result} of "
                "type {type_}. Should be a HookExecutionResult"
            ).format(
                hook_name=hook_def.name,
                result=hook_execution_result,
                type_=type(hook_execution_result),
            ),
        )
        if hook_execution_result and hook_execution_result.is_skipped:
            # when the triggering condition didn't meet in the hook_fn, for instance,
            # a @success_hook decorated user-defined function won't run on a failed solid
            # but internally the hook_fn still runs, so we yield HOOK_SKIPPED event instead
            yield DagsterEvent.hook_skipped(step_context, hook_def)
        else:
            # hook_fn finishes successfully
            yield DagsterEvent.hook_completed(step_context, hook_def)
Exemplo n.º 3
0
def _validate_event(event: Any,
                    step_context: StepExecutionContext) -> SolidOutputUnion:
    if not isinstance(
            event,
        (
            DynamicOutput,
            Output,
            AssetMaterialization,
            Materialization,
            ExpectationResult,
            AssetObservation,
            DagsterEvent,
        ),
    ):
        raise DagsterInvariantViolationError((
            "Compute function for {described_node} yielded a value of type {type_} "
            "rather than an instance of Output, AssetMaterialization, or ExpectationResult."
            " Values yielded by {node_type}s must be wrapped in one of these types. If your "
            "{node_type} has a single output and yields no other events, you may want to use "
            "`return` instead of `yield` in the body of your {node_type} compute function. If "
            "you are already using `return`, and you expected to return a value of type "
            "{type_}, you may be inadvertently returning a generator rather than the value "
            "you expected.").format(
                described_node=step_context.describe_op(),
                type_=type(event),
                node_type=step_context.solid_def.node_type_str,
            ))

    return event
Exemplo n.º 4
0
def _type_checked_event_sequence_for_input(
        step_context: StepExecutionContext, input_name: str,
        input_value: Any) -> Iterator[DagsterEvent]:
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.str_param(input_name, "input_name")

    step_input = step_context.step.step_input_named(input_name)
    input_def = step_input.source.get_input_def(step_context.pipeline_def)
    dagster_type = input_def.dagster_type
    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda:
        (f'Error occurred while type-checking input "{input_name}" of solid '
         f'"{str(step_context.step.solid_handle)}", with Python type {type(input_value)} and '
         f"Dagster type {dagster_type.display_name}"),
    ):
        type_check = do_type_check(step_context.for_type(dagster_type),
                                   dagster_type, input_value)

    yield _create_step_input_event(step_context,
                                   input_name,
                                   type_check=type_check,
                                   success=type_check.success)

    if not type_check.success:
        raise DagsterTypeCheckDidNotPass(
            description=(f'Type check failed for step input "{input_name}" - '
                         f'expected type "{dagster_type.display_name}". '
                         f"Description: {type_check.description}."),
            metadata_entries=type_check.metadata_entries,
            dagster_type=dagster_type,
        )
Exemplo n.º 5
0
def _type_checked_event_sequence_for_input(
    step_context: StepExecutionContext,
    input_name: str,
    input_value: Any,
) -> Iterator[DagsterEvent]:
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.str_param(input_name, "input_name")

    step_input = step_context.step.step_input_named(input_name)
    input_def = step_context.solid_def.input_def_named(step_input.name)

    check.invariant(
        input_def.name == input_name,
        f"InputDefinition name does not match, expected {input_name} got {input_def.name}",
    )

    dagster_type = input_def.dagster_type
    type_check_context = step_context.for_type(dagster_type)
    input_type = type(input_value)
    op_label = step_context.describe_op()

    with user_code_error_boundary(
            DagsterTypeCheckError,
            lambda:
        (f'Error occurred while type-checking input "{input_name}" of {op_label}, with Python '
         f"type {input_type} and Dagster type {dagster_type.display_name}"),
            log_manager=type_check_context.log,
    ):
        type_check = do_type_check(type_check_context, dagster_type,
                                   input_value)

    yield _create_step_input_event(step_context,
                                   input_name,
                                   type_check=type_check,
                                   success=type_check.success)

    if not type_check.success:
        raise DagsterTypeCheckDidNotPass(
            description=(f'Type check failed for step input "{input_name}" - '
                         f'expected type "{dagster_type.display_name}". '
                         f"Description: {type_check.description}"),
            metadata_entries=type_check.metadata_entries,
            dagster_type=dagster_type,
        )
Exemplo n.º 6
0
def _yield_compute_results(step_context: StepExecutionContext,
                           inputs: Dict[str, Any],
                           compute_fn: Callable) -> Iterator[SolidOutputUnion]:
    check.inst_param(step_context, "step_context", StepExecutionContext)

    context = SolidExecutionContext(step_context)
    user_event_generator = compute_fn(context, inputs)

    if isinstance(user_event_generator, Output):
        raise DagsterInvariantViolationError((
            "Compute function for {described_node} returned an Output rather than "
            "yielding it. The compute_fn of the {node_type} must yield "
            "its results").format(
                described_node=step_context.describe_op(),
                node_type=step_context.solid_def.node_type_str,
            ))

    if user_event_generator is None:
        return

    if inspect.isasyncgen(user_event_generator):
        user_event_generator = gen_from_async_gen(user_event_generator)

    op_label = step_context.describe_op()

    for event in iterate_with_context(
            lambda: solid_execution_error_boundary(
                DagsterExecutionStepExecutionError,
                msg_fn=lambda: f"Error occurred while executing {op_label}:",
                step_context=step_context,
                step_key=step_context.step.key,
                op_def_name=step_context.solid_def.name,
                op_name=step_context.solid.name,
            ),
            user_event_generator,
    ):
        if context.has_events():
            yield from context.consume_events()
        yield _validate_event(event, step_context)

    if context.has_events():
        yield from context.consume_events()
Exemplo n.º 7
0
def _type_check_and_store_output(
    step_context: StepExecutionContext,
    output: Union[DynamicOutput, Output],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.inst_param(output, "output", (Output, DynamicOutput))
    check.list_param(input_lineage, "input_lineage", AssetLineageInfo)

    mapping_key = output.mapping_key if isinstance(output,
                                                   DynamicOutput) else None

    step_output_handle = StepOutputHandle(step_key=step_context.step.key,
                                          output_name=output.output_name,
                                          mapping_key=mapping_key)

    # If we are executing using the execute_in_process API, then we allow for the outputs of solids
    # to be directly captured to a dictionary after they are computed.
    if step_context.output_capture is not None:
        step_context.output_capture[step_output_handle] = output.value
    # capture output at the step level for threading the computed output values to hook context
    if step_context.step_output_capture is not None:
        step_context.step_output_capture[step_output_handle] = output.value

    version = (resolve_step_output_versions(
        step_context.pipeline_def, step_context.execution_plan,
        step_context.resolved_run_config).get(step_output_handle)
               if MEMOIZED_RUN_TAG
               in step_context.pipeline.get_definition().tags else None)

    for output_event in _type_check_output(step_context, step_output_handle,
                                           output, version):
        yield output_event

    for evt in _store_output(step_context, step_output_handle, output,
                             input_lineage):
        yield evt

    for evt in _create_type_materializations(step_context, output.output_name,
                                             output.value):
        yield evt
Exemplo n.º 8
0
def _dagster_event_sequence_for_step(
        step_context: StepExecutionContext) -> Iterator[DagsterEvent]:
    """
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) Execution interrupted:
            The run was interrupted in the middle of execution (typically by a
            termination request).

        (5) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (6) Framework failure:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)

    try:
        if step_context.step_launcher:
            # info all on step_context - should deprecate second arg
            step_events = step_context.step_launcher.launch_step(
                step_context, step_context.previous_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(step_context)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if step_context.retry_mode.disabled:
            fail_err = SerializableErrorInfo(
                message="RetryRequested but retries are disabled",
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            step_context.capture_step_exception(retry_request)
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = step_context.previous_attempt_count
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message="Exceeded max_retries of {}".format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                step_context.capture_step_exception(retry_request)
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        step_context.capture_step_exception(failure)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label="intentional-failure",
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        step_context.capture_step_exception(dagster_user_error.user_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.USER_CODE_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except (KeyboardInterrupt,
            DagsterExecutionInterruptedError) as interrupt_error:
        step_context.capture_step_exception(interrupt_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.INTERRUPT,
        )
        raise interrupt_error

    # case (5) in top comment
    except DagsterError as dagster_error:
        step_context.capture_step_exception(dagster_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.FRAMEWORK_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_error

    # case (6) in top comment
    except Exception as unexpected_exception:  # pylint: disable=broad-except
        step_context.capture_step_exception(unexpected_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.UNEXPECTED_ERROR,
        )
        raise unexpected_exception
Exemplo n.º 9
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(
        step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    with solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda:
        (f'Error occurred while handling output "{output_context.name}" of '
         f'step "{step_context.step.key}":'),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
    ):
        handle_output_res = output_manager.handle_output(
            output_context, output.value)

    manager_materializations = []
    manager_metadata_entries = []
    if handle_output_res is not None:
        for elt in ensure_gen(handle_output_res):
            if isinstance(elt, AssetMaterialization):
                manager_materializations.append(elt)
            elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)):
                experimental_functionality_warning(
                    "Yielding metadata from an IOManager's handle_output() function"
                )
                manager_metadata_entries.append(elt)
            else:
                raise DagsterInvariantViolationError(
                    f"IO manager on output {output_def.name} has returned "
                    f"value {elt} of type {type(elt).__name__}. The return type can only be "
                    "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry."
                )

    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        yield DagsterEvent.asset_materialization(step_context, materialization,
                                                 input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager)
    if asset_key:
        for materialization in _get_output_asset_materializations(
                asset_key,
                partitions,
                output,
                output_def,
                manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context,
                                                     materialization,
                                                     input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        message_override=
        f'Handled input "{step_output_handle.output_name}" using intermediate storage'
        if isinstance(output_manager, IntermediateStorageAdapter) else None,
        metadata_entries=[
            entry for entry in manager_metadata_entries
            if isinstance(entry, EventMetadataEntry)
        ],
    )
Exemplo n.º 10
0
def _step_output_error_checked_user_event_sequence(
    step_context: StepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    op_label = step_context.describe_op()
    output_names = list([output_def.name for output_def in step.step_outputs])

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(cast(str, output.output_name)):
            raise DagsterInvariantViolationError(
                f'Core compute for {op_label} returned an output "{output.output_name}" that does '
                f"not exist. The available outputs are {output_names}"
            )

        step_output = step.step_output_named(cast(str, output.output_name))
        output_def = step_context.pipeline_def.get_solid(step_output.solid_handle).output_def_named(
            step_output.name
        )

        if isinstance(output, Output):
            if step_context.has_seen_output(output.output_name):
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} returned an output "{output.output_name}" multiple '
                    "times"
                )

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} for output "{output.output_name}" defined as dynamic '
                    "must yield DynamicOutput, got Output."
                )

            step_context.observe_output(output.output_name)

            metadata = step_context.get_output_metadata(output.output_name)
            output = Output(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
            )
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput, but did not use "
                    "DynamicOutputDefinition."
                )
            if step_context.has_seen_output(output.output_name, output.mapping_key):
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput with mapping_key "
                    f'"{output.mapping_key}" multiple times.'
                )
            step_context.observe_output(output.output_name, output.mapping_key)
            metadata = step_context.get_output_metadata(
                output.output_name, mapping_key=output.mapping_key
            )
            output = DynamicOutput(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries
                + normalize_metadata(cast(Dict[str, Any], metadata), []),
                mapping_key=output.mapping_key,
            )

        yield output

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(step_output.name)
        if not step_context.has_seen_output(step_output_def.name) and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}'
                )
                yield Output(output_name=step_output_def.name, value=None)
            elif not step_output_def.is_dynamic:
                raise DagsterStepOutputNotFoundError(
                    (
                        f"Core compute for {op_label} did not return an output for non-optional "
                        f'output "{step_output_def.name}"'
                    ),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemplo n.º 11
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    manager_materializations = []
    manager_metadata_entries: List[Union[PartitionMetadataEntry, MetadataEntry]] = []

    # output_manager.handle_output is either a generator function, or a normal function with or
    # without a return value. In the case that handle_output is a normal function, we need to
    # catch errors should they be raised before a return value. We can do this by wrapping
    # handle_output in a generator so that errors will be caught within iterate_with_context.

    if not inspect.isgeneratorfunction(output_manager.handle_output):

        def _gen_fn():
            gen_output = output_manager.handle_output(output_context, output.value)
            for event in output_context.consume_events():
                yield event
            if gen_output:
                yield gen_output

        handle_output_gen = _gen_fn()
    else:
        handle_output_gen = output_manager.handle_output(output_context, output.value)

    for elt in iterate_with_context(
        lambda: solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda: (
                f'Error occurred while handling output "{output_context.name}" of '
                f'step "{step_context.step.key}":'
            ),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
        ),
        handle_output_gen,
    ):
        for event in output_context.consume_events():
            yield event

        manager_metadata_entries.extend(output_context.consume_logged_metadata_entries())
        if isinstance(elt, DagsterEvent):
            yield elt
        elif isinstance(elt, AssetMaterialization):
            manager_materializations.append(elt)
        elif isinstance(elt, (MetadataEntry, PartitionMetadataEntry)):
            experimental_functionality_warning(
                "Yielding metadata from an IOManager's handle_output() function"
            )
            manager_metadata_entries.append(elt)
        else:
            raise DagsterInvariantViolationError(
                f"IO manager on output {output_def.name} has returned "
                f"value {elt} of type {type(elt).__name__}. The return type can only be "
                "one of AssetMaterialization, MetadataEntry, PartitionMetadataEntry."
            )

    for event in output_context.consume_events():
        yield event

    manager_metadata_entries.extend(output_context.consume_logged_metadata_entries())
    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        if materialization.metadata_entries and manager_metadata_entries:
            raise DagsterInvariantViolationError(
                f"When handling output '{output_context.name}' of {output_context.solid_def.node_type_str} '{output_context.solid_def.name}', received a materialization with metadata, while context.add_output_metadata was used within the same call to handle_output. Due to potential conflicts, this is not allowed. Please specify metadata in one place within the `handle_output` function."
            )
        if manager_metadata_entries:
            materialization = AssetMaterialization(
                asset_key=materialization.asset_key,
                description=materialization.description,
                metadata_entries=manager_metadata_entries,
                partition=materialization.partition,
                tags=materialization.tags,
                metadata=None,
            )
        yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager
    )
    if asset_key:
        for materialization in _get_output_asset_materializations(
            asset_key,
            partitions,
            output,
            output_def,
            manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        metadata_entries=[
            entry for entry in manager_metadata_entries if isinstance(entry, MetadataEntry)
        ],
    )
Exemplo n.º 12
0
def core_dagster_event_sequence_for_step(
    step_context: StepExecutionContext,
) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)

    if step_context.previous_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context, step_context.previous_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue
        for event_or_input_value in ensure_gen(step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
            _type_checked_event_sequence_for_input(step_context, input_name, input_value)
        ):
            yield evt

    input_lineage = step_context.get_input_lineage()

    # The core execution loop expects a compute generator in a specific format: a generator that
    # takes a context and dictionary of inputs as input, yields output events. If a solid definition
    # was generated from the @solid or @lambda_solid decorator, then compute_fn needs to be coerced
    # into this format. If the solid definition was created directly, then it is expected that the
    # compute_fn is already in this format.
    if isinstance(step_context.solid_def.compute_fn, DecoratedSolidFunction):
        core_gen = create_solid_compute_wrapper(step_context.solid_def)
    else:
        core_gen = step_context.solid_def.compute_fn

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            execute_core_compute(
                step_context,
                inputs,
                core_gen,
            )
        )

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
            _step_output_error_checked_user_event_sequence(step_context, user_event_sequence)
        ):
            if isinstance(user_event, DagsterEvent):
                yield user_event
            elif isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(step_context, user_event, input_lineage):
                    yield evt
            # for now, I'm ignoring AssetMaterializations yielded manually, but we might want
            # to do something with these in the above path eventually
            elif isinstance(user_event, (AssetMaterialization, Materialization)):
                yield DagsterEvent.asset_materialization(step_context, user_event, input_lineage)
            elif isinstance(user_event, AssetObservation):
                yield DagsterEvent.asset_observation(step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier".format(
                        event=user_event
                    )
                )

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis)
    )
Exemplo n.º 13
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    manager_materializations = []
    manager_metadata_entries = []

    # output_manager.handle_output is either a generator function, or a normal function with or
    # without a return value. In the case that handle_output is a normal function, we need to
    # catch errors should they be raised before a return value. We can do this by wrapping
    # handle_output in a generator so that errors will be caught within iterate_with_context.

    if not inspect.isgeneratorfunction(output_manager.handle_output):

        def _gen_fn():
            gen_output = output_manager.handle_output(output_context, output.value)
            if gen_output:
                yield gen_output

        handle_output_gen = _gen_fn()
    else:
        handle_output_gen = output_manager.handle_output(output_context, output.value)

    for elt in iterate_with_context(
        lambda: solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda: (
                f'Error occurred while handling output "{output_context.name}" of '
                f'step "{step_context.step.key}":'
            ),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
        ),
        handle_output_gen,
    ):
        if isinstance(elt, AssetMaterialization):
            manager_materializations.append(elt)
        elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)):
            experimental_functionality_warning(
                "Yielding metadata from an IOManager's handle_output() function"
            )
            manager_metadata_entries.append(elt)
        else:
            raise DagsterInvariantViolationError(
                f"IO manager on output {output_def.name} has returned "
                f"value {elt} of type {type(elt).__name__}. The return type can only be "
                "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry."
            )

    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager
    )
    if asset_key:
        for materialization in _get_output_asset_materializations(
            asset_key,
            partitions,
            output,
            output_def,
            manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        metadata_entries=[
            entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry)
        ],
    )