Пример #1
0
def _execute_steps_core_loop(step_context, inputs, intermediates_manager):
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(inputs, 'inputs', key_type=str)
    check.inst_param(intermediates_manager, 'intermediates_manager',
                     IntermediatesManager)

    evaluated_inputs = {}
    # do runtime type checks of inputs versus step inputs
    for input_name, input_value in inputs.items():
        evaluated_inputs[input_name] = _get_evaluated_input(
            step_context.step, input_name, input_value)
    yield DagsterEvent.step_start_event(step_context)

    with time_execution_scope() as timer_result:
        step_output_iterator = check.generator(
            _iterate_step_outputs_within_boundary(step_context,
                                                  evaluated_inputs))
    for step_output in check.generator(
            _error_check_step_outputs(step_context, step_output_iterator)):

        if isinstance(step_output, StepOutputValue):
            yield _create_step_output_event(step_context, step_output,
                                            intermediates_manager)
        elif isinstance(step_output, Materialization):
            yield DagsterEvent.step_materialization(step_context, step_output)
        elif isinstance(step_output, ExpectationResult):
            yield DagsterEvent.step_expectation_result(step_context,
                                                       step_output)
        else:
            check.failed(
                'Unexpected step_output {step_output}, should have been caught earlier'
                .format(step_output=step_output))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #2
0
def _core_dagster_event_sequence_for_step(step_context):
    '''
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)

    yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediates_manager(
            step_context).items():
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event, Materialization):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    'Unexpected event {event}, should have been caught earlier'
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #3
0
def _core_dagster_event_sequence_for_step(step_context, inputs, intermediates_manager):
    '''
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    '''
    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.dict_param(inputs, 'inputs', key_type=str)
    check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager)

    evaluated_inputs = {}
    # do runtime type checks of inputs versus step inputs
    for input_name, input_value in inputs.items():
        evaluated_inputs[input_name] = _get_evaluated_input(
            step_context.step, input_name, input_value
        )
    yield DagsterEvent.step_start_event(step_context)

    with time_execution_scope() as timer_result:
        event_sequence = check.generator(
            _event_sequence_for_step_compute_fn(step_context, evaluated_inputs)
        )

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for event in check.generator(
            _step_output_error_checked_event_sequence(step_context, event_sequence)
        ):

            if isinstance(event, Result):
                yield _create_step_output_event(step_context, event, intermediates_manager)
            elif isinstance(event, Materialization):
                yield DagsterEvent.step_materialization(step_context, event)
            elif isinstance(event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(step_context, event)
            else:
                check.failed(
                    'Unexpected event {event}, should have been caught earlier'.format(event=event)
                )

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis)
    )
Пример #4
0
    def log_event(self, event: UserEvent) -> None:
        """Log an AssetMaterialization, AssetObservation, or ExpectationResult from within the body of an op.

        Events logged with this method will appear in the list of DagsterEvents, as well as the event log.

        Args:
            event (Union[AssetMaterialization, Materialization, AssetObservation, ExpectationResult]): The event to log.

        **Examples:**

        .. code-block:: python

            from dagster import op, AssetMaterialization

            @op
            def log_materialization(context):
                context.log_event(AssetMaterialization("foo"))
        """

        if isinstance(event, (AssetMaterialization, Materialization)):
            self._events.append(
                DagsterEvent.asset_materialization(
                    self._step_execution_context,
                    event,
                    self._step_execution_context.get_input_lineage(),
                ))
        elif isinstance(event, AssetObservation):
            self._events.append(
                DagsterEvent.asset_observation(self._step_execution_context,
                                               event))
        elif isinstance(event, ExpectationResult):
            self._events.append(
                DagsterEvent.step_expectation_result(
                    self._step_execution_context, event))
        else:
            check.failed("Unexpected event {event}".format(event=event))
Пример #5
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                yield DagsterEvent.object_store_operation(
                    step_context,
                    ObjectStoreOperation.serializable(op,
                                                      value_name=input_name))
            inputs[input_name] = [op.obj for op in input_value]
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    # We only want to log exactly one step success event or failure event if possible,
    # so wait to handle any interrupts (that normally log a failure event) until the success
    # event has finished
    with delay_interrupts():
        yield DagsterEvent.step_success_event(
            step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #6
0
def core_dagster_event_sequence_for_step(
        step_context: SystemStepExecutionContext,
        prior_attempt_count: int) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        for event_or_input_value in ensure_gen(
                step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #7
0
def core_dagster_event_sequence_for_step(
    step_context: StepExecutionContext,
) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)

    if step_context.previous_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context, step_context.previous_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue
        for event_or_input_value in ensure_gen(step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
            _type_checked_event_sequence_for_input(step_context, input_name, input_value)
        ):
            yield evt

    input_lineage = step_context.get_input_lineage()

    # The core execution loop expects a compute generator in a specific format: a generator that
    # takes a context and dictionary of inputs as input, yields output events. If a solid definition
    # was generated from the @solid or @lambda_solid decorator, then compute_fn needs to be coerced
    # into this format. If the solid definition was created directly, then it is expected that the
    # compute_fn is already in this format.
    if isinstance(step_context.solid_def.compute_fn, DecoratedSolidFunction):
        core_gen = create_solid_compute_wrapper(step_context.solid_def)
    else:
        core_gen = step_context.solid_def.compute_fn

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            execute_core_compute(
                step_context,
                inputs,
                core_gen,
            )
        )

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
            _step_output_error_checked_user_event_sequence(step_context, user_event_sequence)
        ):
            if isinstance(user_event, DagsterEvent):
                yield user_event
            elif isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(step_context, user_event, input_lineage):
                    yield evt
            # for now, I'm ignoring AssetMaterializations yielded manually, but we might want
            # to do something with these in the above path eventually
            elif isinstance(user_event, (AssetMaterialization, Materialization)):
                yield DagsterEvent.asset_materialization(step_context, user_event, input_lineage)
            elif isinstance(user_event, AssetObservation):
                yield DagsterEvent.asset_observation(step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier".format(
                        event=user_event
                    )
                )

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis)
    )
Пример #8
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _load_input_values(step_context):
        # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, FanInStepInputValuesWrapper):
            final_values = []
            for inner_value in input_value:
                # inner value is either a store interaction
                # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
                if isinstance(inner_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            inner_value, value_name=input_name),
                    )
                    final_values.append(inner_value.obj)
                elif isinstance(inner_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context,
                        AssetStoreOperation.serializable(inner_value))
                    final_values.append(inner_value.obj)
                # or the value directly
                else:
                    final_values.append(inner_value)

            inputs[input_name] = final_values
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(
                step_context, AssetStoreOperation.serializable(input_value))
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))