Пример #1
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                if isinstance(input_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            op, value_name=input_name))
                elif isinstance(input_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context, input_value)
            inputs[input_name] = [op.obj for op in input_value]
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, input_value)
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #2
0
def _dagster_event_sequence_for_step(step_context, retries):
    '''
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (5) Framework failure or interrupt:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.inst_param(retries, 'retries', Retries)
    try:
        prior_attempt_count = retries.get_attempt_count(step_context.step.key)
        if step_context.step_launcher:
            step_events = step_context.step_launcher.launch_step(
                step_context, prior_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(
                step_context, prior_attempt_count)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if retries.disabled:
            fail_err = SerializableErrorInfo(
                message='RetryRequested but retries are disabled',
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = retries.get_attempt_count(step_context.step.key)
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message='Exceeded max_retries of {}'.format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        yield _step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label='intentional-failure',
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield _step_failure_event_from_exc_info(
            step_context,
            dagster_user_error.original_exc_info,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except DagsterError as dagster_error:
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        if step_context.raise_on_error:
            raise dagster_error

    # case (5) in top comment
    except (Exception, KeyboardInterrupt) as unexpected_exception:  # pylint: disable=broad-except
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        raise unexpected_exception
Пример #3
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _load_input_values(step_context):
        # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, FanInStepInputValuesWrapper):
            final_values = []
            for inner_value in input_value:
                # inner value is either a store interaction
                # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
                if isinstance(inner_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            inner_value, value_name=input_name),
                    )
                    final_values.append(inner_value.obj)
                elif isinstance(inner_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context, inner_value)
                    final_values.append(inner_value.obj)
                # or the value directly
                else:
                    final_values.append(inner_value)

            inputs[input_name] = final_values
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, input_value)
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #4
0
def core_dagster_event_sequence_for_step(
        step_context: SystemStepExecutionContext,
        prior_attempt_count: int) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        if step_input.dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        for event_or_input_value in ensure_gen(
                step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Пример #5
0
def inner_plan_execution_iterator(
    pipeline_context: PlanExecutionContext, execution_plan: ExecutionPlan
) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context", PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step, active_execution.retry_state.get_attempt_count(step.key)
                ),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                (
                    "Expected step context for solid {solid_name} to have all required resources, but "
                    "missing {missing_resources}."
                ).format(solid_name=step_context.solid.name, missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with ExitStack() as stack:
                log_capture_error = None
                try:
                    stack.enter_context(
                        pipeline_context.instance.compute_log_manager.watch(
                            step_context.pipeline_run, step_context.step.key
                        )
                    )
                except Exception as e:
                    log_capture_error = e
                    logging.exception(
                        "Exception while setting up compute log capture for step %s in run %s: %s",
                        step_context.step.key,
                        step_context.pipeline_run.run_id,
                        e,
                    )

                if not log_capture_error:
                    yield DagsterEvent.capture_logs(
                        step_context, log_key=step_context.step.key, steps=[step_context.step]
                    )

                for step_event in check.generator(_dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

                try:
                    stack.close()
                except Exception as e:
                    logging.exception(
                        "Exception while cleaning up compute log capture for step %s in run %s: %s",
                        step_context.step.key,
                        step_context.pipeline_run.run_id,
                        e,
                    )

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event
Пример #6
0
def _dagster_event_sequence_for_step(step_context: StepExecutionContext) -> Iterator[DagsterEvent]:
    """
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) Execution interrupted:
            The run was interrupted in the middle of execution (typically by a
            termination request).

        (5) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (6) Framework failure:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)

    try:
        if step_context.step_launcher:
            # info all on step_context - should deprecate second arg
            step_events = step_context.step_launcher.launch_step(
                step_context, step_context.previous_attempt_count
            )
        else:
            step_events = core_dagster_event_sequence_for_step(step_context)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if step_context.retry_mode.disabled:
            fail_err = SerializableErrorInfo(
                message="RetryRequested but retries are disabled",
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            step_context.capture_step_exception(retry_request)
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err, user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = step_context.previous_attempt_count
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message="Exceeded max_retries of {}".format(retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                step_context.capture_step_exception(retry_request)
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err, user_failure_data=None),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        step_context.capture_step_exception(failure)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label="intentional-failure",
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        step_context.capture_step_exception(dagster_user_error.user_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.USER_CODE_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except (KeyboardInterrupt, DagsterExecutionInterruptedError) as interrupt_error:
        step_context.capture_step_exception(interrupt_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.INTERRUPT,
        )
        raise interrupt_error

    # case (5) in top comment
    except DagsterError as dagster_error:
        step_context.capture_step_exception(dagster_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.FRAMEWORK_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_error

    # case (6) in top comment
    except BaseException as unexpected_exception:
        step_context.capture_step_exception(unexpected_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.UNEXPECTED_ERROR,
        )
        raise unexpected_exception
Пример #7
0
    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute,
                             'step_keys_to_execute',
                             of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(
            step_keys_to_execute)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, ExecutorConfig),
            'pipeline_context',
            'Expected executor_config to be ExecutorConfig got {}'.format(
                pipeline_context.executor_config),
        )

        failed_or_skipped_steps = set()

        step_levels = execution_plan.topological_step_levels()

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        for step_level in step_levels:
            for step in step_level:
                if step_key_set and step.key not in step_key_set:
                    continue

                step_context = pipeline_context.for_step(step)

                failed_inputs = [
                    step_input.prev_output_handle.step_key
                    for step_input in step.step_inputs
                    if step_input.is_from_output and step_input.
                    prev_output_handle.step_key in failed_or_skipped_steps
                ]
                if failed_inputs:
                    step_context.log.info((
                        'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                    ).format(step=step.key, failed_inputs=failed_inputs))
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                    step_context, step)
                if uncovered_inputs:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_inputs_optional(uncovered_inputs,
                                                    execution_plan, step.key)

                    step_context.log.info((
                        'Not all inputs covered for {step}. Not executing. Output missing for '
                        'inputs: {uncovered_inputs}').format(
                            uncovered_inputs=uncovered_inputs, step=step.key))
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                for step_event in check.generator(
                        dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    if step_event.is_step_failure:
                        failed_or_skipped_steps.add(step.key)

                    yield step_event
Пример #8
0
def dagster_event_sequence_for_step(step_context):
    '''
    Yield a sequence of dagster events for the given step with the step context.

    Thie function also processes errors. It handles a few error cases:
        (1) The user-space code has raised an Exception. It has been
        wrapped in an exception derived from DagsterUserCodeException. In that
        case the original user exc_info is stashed on the exception
        as the original_exc_info property. Examples of this are computations
        with the compute_fn, and type checks. If the user has raised an
        intentional error via throwing Failure, they can also optionally
        pass along explicit metadata attached to that Failure.
        (2) The framework raised a DagsterError that indicates a usage error
        or some other error not communicated by a user-thrown exception. For example,
        if the user yields an object out of a compute function that is not a
        proper event (not an Output, ExpectationResult, etc).
        (3) An unexpected error occured. This is a framework error. Either there
        has been an internal error in the framewore OR we have forgtten to put a
        user code error boundary around invoked user-space code. These terminate
        the computation immediately (by re-raising) even if raise_on_error is false.

    If the raise_on_error option is set to True, these errors are reraised and surfaced
    to the user. This is mostly to get sensible errors in test and ad-hoc contexts, rather
    than forcing the user to wade through the PipelineExecutionResult API in order to find
    the step that errored.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)

    try:
        for step_event in check.generator(
                _core_dagster_event_sequence_for_step(step_context)):
            yield step_event

    # case (1) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:  # case (1) above
        yield _step_failure_event_from_exc_info(
            step_context,
            dagster_user_error.original_exc_info,
            UserFailureData(
                label='intentional-failure',
                description=dagster_user_error.user_specified_failure.
                description,
                metadata_entries=dagster_user_error.user_specified_failure.
                metadata_entries,
            ) if dagster_user_error.is_user_specified_failure else None,
        )

        if step_context.executor_config.raise_on_error:
            raise dagster_user_error

    # case (2) in top comment
    except DagsterError as dagster_error:
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        if step_context.executor_config.raise_on_error:
            raise dagster_error

    # case (3) in top comment
    except Exception as unexpected_exception:  # pylint: disable=broad-except
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        raise unexpected_exception
Пример #9
0
def inner_plan_execution_iterator(pipeline_context, execution_plan):
    check.inst_param(pipeline_context, "pipeline_context",
                     SystemExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    retries = pipeline_context.retries

    for event in copy_required_intermediates_for_execution(
            pipeline_context, execution_plan):
        yield event

    with execution_plan.start(retries=retries) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = pipeline_context.for_step(step)
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                ("Expected step context for solid {solid_name} to have all required resources, but "
                 "missing {missing_resources}.").format(
                     solid_name=step_context.solid.name,
                     missing_resources=missing_resources),
            )

            with pipeline_context.instance.compute_log_manager.watch(
                    step_context.pipeline_run, step_context.step.key):
                # capture all of the logs for this step
                uncovered_inputs = pipeline_context.intermediate_storage.uncovered_inputs(
                    step_context, step)
                if uncovered_inputs:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_inputs_optional(uncovered_inputs,
                                                    execution_plan, step.key)

                    step_context.log.info((
                        "Not all inputs covered for {step}. Not executing. Output missing for "
                        "inputs: {uncovered_inputs}").format(
                            uncovered_inputs=uncovered_inputs, step=step.key))
                    step_event = DagsterEvent.step_skipped_event(step_context)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.mark_skipped(step.key)
                else:
                    for step_event in check.generator(
                            _dagster_event_sequence_for_step(
                                step_context, retries)):
                        check.inst(step_event, DagsterEvent)
                        step_event_list.append(step_event)
                        yield step_event
                        active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event