Python DagsterEvent примеры использования

Язык программирования: Python

Пространство имен/Пакет: dagster.core.events

Класс/Тип: DagsterEvent

Примеров на hotexamples.com: 30

Python DagsterEvent - 30 примеров найдено. Это лучшие примеры Python кода для dagster.core.events.DagsterEvent, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DagsterEvent(30)

engine_event(17)

step_materialization(13)

pipeline_failure(12)

pipeline_success(12)

step_failure_event(11)

pipeline_init_failure(11)

pipeline_start(10)

step_expectation_result(8)

asset_materialization(8)

step_output_event(8)

step_success_event(7)

step_start_event(7)

loaded_input(6)

asset_store_operation(6)

step_restarted_event(5)

handled_output(5)

resource_init_failure(5)

resource_init_start(5)

step_skipped_event(4)

asset_observation(4)

resource_init_success(4)

resource_teardown_failure(4)

object_store_operation(3)

step_retry_event(2)

step_input_event(1)

hook_skipped(1)

capture_logs(1)

hook_completed(1)

pipeline_canceled(1)

hook_errored(1)

Пример #1

Показать файл

    def execute(pipeline_context, execution_plan):
        from .tasks import make_app

        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, CeleryConfig),
            'pipeline_context',
            'Expected executor_config to be CeleryConfig got {}'.format(
                pipeline_context.executor_config),
        )

        celery_config = pipeline_context.executor_config

        storage = pipeline_context.environment_dict.get('storage')

        if (celery_config.broker and not is_local_uri(celery_config.broker)
            ) or (celery_config.backend
                  and not is_local_uri(celery_config.backend)):
            check.invariant(
                storage.get('s3') or storage.get('gcs'),
                'Must use S3 or GCS storage with non-local Celery broker: {broker} '
                'and backend: {backend}'.format(broker=celery_config.broker,
                                                backend=celery_config.backend),
            )
        else:
            check.invariant(
                not storage.get('in_memory'),
                'Cannot use in-memory storage with Celery, use filesystem, S3, or GCS',
            )

        app = make_app(celery_config)

        priority_for_step = lambda step: (-1 * int(
            step.tags.get('dagster-celery/priority', task_default_priority)
        ) + -1 * _get_run_priority(pipeline_context))
        priority_for_key = lambda step_key: (priority_for_step(
            execution_plan.get_step_by_key(step_key)))
        _warn_on_priority_misuse(pipeline_context, execution_plan)

        step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
        step_errors = {}
        completed_steps = set({})  # Set[step_key]
        active_execution = execution_plan.start(
            retries=pipeline_context.executor_config.retries,
            sort_key_fn=priority_for_step)
        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:

            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except Exception as e:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                        stopping = True
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        active_execution.handle_event(event)

                    results_to_pop.append(step_key)
                    completed_steps.add(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    active_execution.verify_complete(pipeline_context,
                                                     step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.skipped_step_events_iterator(
                    pipeline_context):
                yield event

            # don't add any new steps if we are stopping
            if stopping:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    queue = step.tags.get('dagster-celery/queue',
                                          task_default_queue)
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Submitting celery task for step "{step_key}" to queue "{queue}".'
                        .format(step_key=step.key, queue=queue),
                        EngineEventData(marker_start=DELEGATE_MARKER),
                        step_key=step.key,
                    )
                    step_results[step.key] = _submit_task(
                        app, pipeline_context, step, queue)
                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Encountered error during celery task submission.'.
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                'During celery execution errors occurred in workers:\n{error_list}'
                .format(error_list='\n'.join([
                    '[{step}]: {err}'.format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )

Пример #2

Показать файл

Файл: execute_plan.py Проект: spencer-zepelin/dagster

def inner_plan_execution_iterator(pipeline_context, execution_plan, retries):
    check.inst_param(pipeline_context, 'pipeline_context',
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(retries, 'retries', Retries)

    for event in copy_required_intermediates_for_execution(
            pipeline_context, execution_plan):
        yield event

    # It would be good to implement a reference tracking algorithm here to
    # garbage collect results that are no longer needed by any steps
    # https://github.com/dagster-io/dagster/issues/811
    active_execution = execution_plan.start(retries=retries)
    while not active_execution.is_complete:
        step = active_execution.get_next_step()

        step_context = pipeline_context.for_step(step)

        missing_resources = [
            resource_key
            for resource_key in step_context.required_resource_keys
            if not hasattr(step_context.resources, resource_key)
        ]
        check.invariant(
            len(missing_resources) == 0,
            ('Expected step context for solid {solid_name} to have all required resources, but '
             'missing {missing_resources}.').format(
                 solid_name=step_context.solid.name,
                 missing_resources=missing_resources),
        )

        with pipeline_context.instance.compute_log_manager.watch(
                step_context.pipeline_run, step_context.step.key):
            # capture all of the logs for this step
            uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                step_context, step)
            if uncovered_inputs:
                # In partial pipeline execution, we may end up here without having validated the
                # missing dependent outputs were optional
                _assert_missing_inputs_optional(uncovered_inputs,
                                                execution_plan, step.key)

                step_context.log.info((
                    'Not all inputs covered for {step}. Not executing. Output missing for '
                    'inputs: {uncovered_inputs}').format(
                        uncovered_inputs=uncovered_inputs, step=step.key))
                yield DagsterEvent.step_skipped_event(step_context)
                active_execution.mark_skipped(step.key)
            else:
                for step_event in check.generator(
                        _dagster_event_sequence_for_step(
                            step_context, retries)):
                    check.inst(step_event, DagsterEvent)
                    yield step_event
                    active_execution.handle_event(step_event)

            active_execution.verify_complete(pipeline_context, step.key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(
                pipeline_context):
            yield event

Пример #3

Показать файл

def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(
            error_info,
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    # We should update the GraphQL response so that clients don't need to do this handle parsing.
    # See: https://github.com/dagster-io/dagster/issues/1559
    keys = event_dict['step']['solidHandleID'].split('.')
    handle = None
    while keys:
        handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=handle,
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )

Пример #4

Показать файл

Файл: execute_plan.py Проект: sarahmk125/dagster

def _dagster_event_sequence_for_step(
        step_context: StepExecutionContext) -> Iterator[DagsterEvent]:
    """
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) Execution interrupted:
            The run was interrupted in the middle of execution (typically by a
            termination request).

        (5) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (6) Framework failure:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)

    try:
        if step_context.step_launcher:
            # info all on step_context - should deprecate second arg
            step_events = step_context.step_launcher.launch_step(
                step_context, step_context.previous_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(step_context)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if step_context.retry_mode.disabled:
            fail_err = SerializableErrorInfo(
                message="RetryRequested but retries are disabled",
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            step_context.capture_step_exception(retry_request)
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = step_context.previous_attempt_count
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message="Exceeded max_retries of {}".format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                step_context.capture_step_exception(retry_request)
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        step_context.capture_step_exception(failure)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label="intentional-failure",
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        step_context.capture_step_exception(dagster_user_error.user_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.USER_CODE_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except (KeyboardInterrupt,
            DagsterExecutionInterruptedError) as interrupt_error:
        step_context.capture_step_exception(interrupt_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.INTERRUPT,
        )
        raise interrupt_error

    # case (5) in top comment
    except DagsterError as dagster_error:
        step_context.capture_step_exception(dagster_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.FRAMEWORK_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_error

    # case (6) in top comment
    except Exception as unexpected_exception:  # pylint: disable=broad-except
        step_context.capture_step_exception(unexpected_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.UNEXPECTED_ERROR,
        )
        raise unexpected_exception

Пример #5

Показать файл

    def launch_step(self, step_handler_context: StepHandlerContext):
        events = []

        assert (len(
            step_handler_context.execute_step_args.step_keys_to_execute) == 1
                ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        job_name = self._get_k8s_step_job_name(step_handler_context)
        pod_name = job_name

        args = step_handler_context.execute_step_args.get_command_args()

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                step_handler_context.execute_step_args.pipeline_origin.
                repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the job")

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(step_handler_context.step_tags[step_key]))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="step_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            labels={
                "dagster/job":
                step_handler_context.execute_step_args.pipeline_origin.
                pipeline_name,
                "dagster/op":
                step_key,
            },
        )

        events.append(
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Executing step {step_key} in Kubernetes job {job_name}",
                event_specific_data=EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                ], ),
            ))

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self._job_namespace)

        return events

Пример #6

Показать файл

Файл: engine_multiprocess.py Проект: xhochy/dagster

    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        step_levels = execution_plan.execution_step_levels()

        intermediates_manager = pipeline_context.intermediates_manager

        limit = pipeline_context.executor_config.max_concurrent

        step_key_set = set(step.key for step in execution_plan.execution_steps())

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps using multiprocess engine: parent process (pid: {pid})'.format(
                pid=os.getpid()
            ),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(), step_keys_to_execute=step_key_set
            ),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            for event in copy_required_intermediates_for_execution(
                pipeline_context, execution_plan
            ):
                yield event

            for step_level in step_levels:
                step_contexts_to_execute = []
                for step in step_level:
                    step_context = pipeline_context.for_step(step)

                    if not intermediates_manager.all_inputs_covered(step_context, step):
                        uncovered_inputs = intermediates_manager.uncovered_inputs(
                            step_context, step
                        )
                        step_context.log.error(
                            (
                                'Not all inputs covered for {step}. Not executing.'
                                'Output missing for inputs: {uncovered_inputs}'
                            ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                        )
                        continue

                    step_contexts_to_execute.append(step_context)
                for step_event in bounded_parallel_executor(
                    pipeline_context, step_contexts_to_execute, limit
                ):
                    yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Multiprocess engine: parent process exiting after {duration} (pid: {pid})'.format(
                duration=format_duration(timer_result.millis), pid=os.getpid()
            ),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )

Пример #7

Показать файл

Файл: execute_step.py Проект: ggservice007/dagster

def core_dagster_event_sequence_for_step(
        step_context: SystemStepExecutionContext,
        prior_attempt_count: int) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        for event_or_input_value in ensure_gen(
                step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))

Пример #8

Показать файл

Файл: engine_inprocess.py Проект: Step2Web/dagster

    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(step_keys_to_execute)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(os.getpid(), step_key_set),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config
                ),
            )

            failed_or_skipped_steps = set()

            step_levels = execution_plan.topological_step_levels()

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            for step_level in step_levels:
                for step in step_level:
                    if step_key_set and step.key not in step_key_set:
                        continue

                    step_context = pipeline_context.for_step(step)

                    with mirror_step_io(step_context):
                        # capture all of the logs for this step

                        failed_inputs = []
                        for step_input in step.step_inputs:
                            failed_inputs.extend(
                                failed_or_skipped_steps.intersection(step_input.dependency_keys)
                            )

                        if failed_inputs:
                            step_context.log.info(
                                (
                                    'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                                ).format(step=step.key, failed_inputs=failed_inputs)
                            )
                            failed_or_skipped_steps.add(step.key)
                            yield DagsterEvent.step_skipped_event(step_context)
                            continue

                        uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                            step_context, step
                        )
                        if uncovered_inputs:
                            # In partial pipeline execution, we may end up here without having validated the
                            # missing dependent outputs were optional
                            _assert_missing_inputs_optional(
                                uncovered_inputs, execution_plan, step.key
                            )

                            step_context.log.info(
                                (
                                    'Not all inputs covered for {step}. Not executing. Output missing for '
                                    'inputs: {uncovered_inputs}'
                                ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                            )
                            failed_or_skipped_steps.add(step.key)
                            yield DagsterEvent.step_skipped_event(step_context)
                            continue

                        for step_event in check.generator(
                            dagster_event_sequence_for_step(step_context)
                        ):
                            check.inst(step_event, DagsterEvent)
                            if step_event.is_step_failure:
                                failed_or_skipped_steps.add(step.key)

                            yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(), duration_ms=format_duration(timer_result.millis)
            ),
            event_specific_data=EngineEventData.in_process(os.getpid(), step_key_set),
        )

Пример #9

Показать файл

    def test_fetch_records_by_update_timestamp(self, storage):
        assert storage
        self._skip_in_memory(storage)

        one = make_new_run_id()
        two = make_new_run_id()
        three = make_new_run_id()
        storage.add_run(
            TestRunStorage.build_run(
                run_id=one, pipeline_name="some_pipeline", status=PipelineRunStatus.STARTED
            )
        )
        storage.add_run(
            TestRunStorage.build_run(
                run_id=two, pipeline_name="some_pipeline", status=PipelineRunStatus.FAILURE
            )
        )
        storage.add_run(
            TestRunStorage.build_run(
                run_id=three, pipeline_name="some_pipeline", status=PipelineRunStatus.STARTED
            )
        )
        storage.handle_run_event(
            three,  # three succeeds
            DagsterEvent(
                message="a message",
                event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
                pipeline_name="some_pipeline",
            ),
        )
        storage.handle_run_event(
            one,  # fail one after two has fails and three has succeeded
            DagsterEvent(
                message="a message",
                event_type_value=DagsterEventType.PIPELINE_FAILURE.value,
                pipeline_name="some_pipeline",
            ),
        )

        record_two = storage.get_run_records(
            filters=PipelineRunsFilter(run_ids=[two], updated_after=datetime(2020, 1, 1))
        )[0]
        run_two_update_timestamp = record_two.update_timestamp

        assert [
            record.pipeline_run.run_id
            for record in storage.get_run_records(
                filters=PipelineRunsFilter(updated_after=run_two_update_timestamp),
                order_by="update_timestamp",
                ascending=True,
            )
        ] == [three, one]

        assert [
            record.pipeline_run.run_id
            for record in storage.get_run_records(
                filters=PipelineRunsFilter(
                    statuses=[PipelineRunStatus.FAILURE], updated_after=run_two_update_timestamp
                ),
            )
        ] == [one]

Пример #10

Показать файл

    def event_generator(
        self,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
        scoped_resources_builder_cm,
        intermediate_storage=None,
        raise_on_error=False,
        resource_instances_to_override=None,
        output_capture=None,
    ):
        execution_plan = check.inst_param(execution_plan, "execution_plan",
                                          ExecutionPlan)
        pipeline_def = execution_plan.pipeline.get_definition()

        run_config = check.dict_param(run_config, "run_config", key_type=str)
        pipeline_run = check.inst_param(pipeline_run, "pipeline_run",
                                        PipelineRun)
        instance = check.inst_param(instance, "instance", DagsterInstance)

        scoped_resources_builder_cm = check.callable_param(
            scoped_resources_builder_cm, "scoped_resources_builder_cm")
        intermediate_storage = check.opt_inst_param(
            intermediate_storage, "intermediate_storage_data",
            IntermediateStorage)
        raise_on_error = check.bool_param(raise_on_error, "raise_on_error")
        resource_instances_to_override = check.opt_dict_param(
            resource_instances_to_override, "resource_instances_to_override")

        execution_context = None
        resources_manager = None

        try:
            context_creation_data = create_context_creation_data(
                execution_plan,
                run_config,
                pipeline_run,
                instance,
            )

            log_manager = create_log_manager(context_creation_data)
            resource_defs = execution_plan.pipeline_def.get_mode_definition(
                context_creation_data.environment_config.mode).resource_defs
            resources_manager = scoped_resources_builder_cm(
                resource_defs=resource_defs,
                resource_configs=context_creation_data.environment_config.
                resources,
                log_manager=log_manager,
                execution_plan=execution_plan,
                pipeline_run=context_creation_data.pipeline_run,
                resource_keys_to_init=context_creation_data.
                resource_keys_to_init,
                instance=instance,
                resource_instances_to_override=resource_instances_to_override,
                emit_persistent_events=True,
            )
            yield from resources_manager.generate_setup_events()
            scoped_resources_builder = check.inst(
                resources_manager.get_object(), ScopedResourcesBuilder)

            intermediate_storage = create_intermediate_storage(
                context_creation_data,
                intermediate_storage,
                scoped_resources_builder,
            )

            execution_context = self.construct_context(
                context_creation_data=context_creation_data,
                scoped_resources_builder=scoped_resources_builder,
                log_manager=log_manager,
                intermediate_storage=intermediate_storage,
                raise_on_error=raise_on_error,
                output_capture=output_capture,
            )

            _validate_plan_with_context(execution_context, execution_plan)

            yield execution_context
            yield from resources_manager.generate_teardown_events()
        except DagsterError as dagster_error:
            if execution_context is None:
                user_facing_exc_info = (
                    # pylint does not know original_exc_info exists is is_user_code_error is true
                    # pylint: disable=no-member
                    dagster_error.original_exc_info
                    if dagster_error.is_user_code_error else sys.exc_info())
                error_info = serializable_error_info_from_exc_info(
                    user_facing_exc_info)

                yield DagsterEvent.pipeline_init_failure(
                    pipeline_name=pipeline_def.name,
                    failure_data=PipelineInitFailureData(error=error_info),
                    log_manager=_create_context_free_log_manager(
                        instance, pipeline_run, pipeline_def),
                )
                if resources_manager:
                    yield from resources_manager.generate_teardown_events()
            else:
                # pipeline teardown failure
                raise dagster_error

            if raise_on_error:
                raise dagster_error

Пример #11

Показать файл

def inner_plan_execution_iterator(pipeline_context, execution_plan):
    check.inst_param(pipeline_context, "pipeline_context", SystemExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    retries = pipeline_context.retries

    yield from copy_required_intermediates_for_execution(pipeline_context, execution_plan)

    with execution_plan.start(retries=retries) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = pipeline_context.for_step(step)
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                (
                    "Expected step context for solid {solid_name} to have all required resources, but "
                    "missing {missing_resources}."
                ).format(solid_name=step_context.solid.name, missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with pipeline_context.instance.compute_log_manager.watch(
                step_context.pipeline_run, step_context.step.key
            ):
                missing_input_sources = pipeline_context.intermediate_storage.get_missing_input_sources(
                    step_context, step
                )
                if missing_input_sources:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_sources_from_optional_outputs(
                        missing_input_sources, execution_plan, step.key
                    )

                    step_context.log.info(
                        (
                            "Not all inputs covered for {step}. Not executing. Sources missing: {missing_input_sources}"
                        ).format(missing_input_sources=missing_input_sources, step=step.key)
                    )
                    step_event = DagsterEvent.step_skipped_event(step_context)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.mark_skipped(step.key)
                else:
                    for step_event in check.generator(
                        _dagster_event_sequence_for_step(step_context, retries)
                    ):
                        check.inst(step_event, DagsterEvent)
                        step_event_list.append(step_event)
                        yield step_event
                        active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event

Пример #12

Показать файл

Файл: execute_plan.py Проект: prezi/dagster

def inner_plan_execution_iterator(
        pipeline_context: PlanExecutionContext,
        execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context",
                     PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(
            retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step,
                    active_execution.retry_state.get_attempt_count(step.key)),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                ("Expected step context for solid {solid_name} to have all required resources, but "
                 "missing {missing_resources}.").format(
                     solid_name=step_context.solid.name,
                     missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with pipeline_context.instance.compute_log_manager.watch(
                    step_context.pipeline_run, step_context.step.key):
                yield DagsterEvent.capture_logs(step_context,
                                                log_key=step_context.step.key,
                                                steps=[step_context.step])

                for step_event in check.generator(
                        _dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event

Пример #13

Показать файл

def pipeline_initialization_event_generator(
    pipeline_def,
    environment_dict,
    pipeline_run,
    instance,
    execution_plan,
    scoped_resources_builder_cm,
    system_storage_data=None,
    raise_on_error=False,
):
    pipeline_def = check.inst_param(pipeline_def, 'pipeline_def',
                                    PipelineDefinition)
    environment_dict = check.dict_param(environment_dict,
                                        'environment_dict',
                                        key_type=str)
    pipeline_run = check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    instance = check.inst_param(instance, 'instance', DagsterInstance)
    execution_plan = check.inst_param(execution_plan, 'execution_plan',
                                      ExecutionPlan)
    scoped_resources_builder_cm = check.callable_param(
        scoped_resources_builder_cm, 'scoped_resources_builder_cm')
    system_storage_data = check.opt_inst_param(system_storage_data,
                                               'system_storage_data',
                                               SystemStorageData)
    raise_on_error = check.bool_param(raise_on_error, 'raise_on_error')

    pipeline_context = None
    resources_manager = None

    try:
        context_creation_data = create_context_creation_data(
            pipeline_def,
            environment_dict,
            pipeline_run,
            instance,
            execution_plan,
        )
        executor_config = create_executor_config(context_creation_data)
        log_manager = create_log_manager(context_creation_data)
        resources_manager = scoped_resources_builder_cm(
            execution_plan,
            context_creation_data.environment_config,
            context_creation_data.pipeline_run,
            log_manager,
            context_creation_data.resource_keys_to_init,
        )
        for event in resources_manager.generate_setup_events():
            yield event
        scoped_resources_builder = check.inst(resources_manager.get_object(),
                                              ScopedResourcesBuilder)
        system_storage_data = create_system_storage_data(
            context_creation_data, system_storage_data,
            scoped_resources_builder)
        pipeline_context = construct_pipeline_execution_context(
            context_creation_data=context_creation_data,
            scoped_resources_builder=scoped_resources_builder,
            system_storage_data=system_storage_data,
            log_manager=log_manager,
            executor_config=executor_config,
            raise_on_error=raise_on_error,
        )

        _validate_plan_with_context(pipeline_context, execution_plan)

        yield pipeline_context
        for event in resources_manager.generate_teardown_events():
            yield event
    except DagsterError as dagster_error:
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(
                    instance, pipeline_run, pipeline_def),
            )
            if resources_manager:
                for event in resources_manager.generate_teardown_events():
                    yield event
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error

Пример #14

Показать файл

Файл: engine.py Проект: cy56/dagster

def _core_celery_execution_loop(pipeline_context, execution_plan, step_execution_fn):
    from .tasks import make_app

    check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.callable_param(step_execution_fn, 'step_execution_fn')

    check.param_invariant(
        isinstance(pipeline_context.executor_config, (CeleryConfig, CeleryK8sJobConfig)),
        'pipeline_context',
        'Expected executor_config to be Celery config got {}'.format(
            pipeline_context.executor_config
        ),
    )

    celery_config = pipeline_context.executor_config

    # https://github.com/dagster-io/dagster/issues/2440
    check.invariant(
        pipeline_context.system_storage_def.is_persistent,
        'Cannot use in-memory storage with Celery, use filesystem (on top of NFS or '
        'similar system that allows files to be available to all nodes), S3, or GCS',
    )

    app = make_app(celery_config)

    priority_for_step = lambda step: (
        -1 * int(step.tags.get(DAGSTER_CELERY_STEP_PRIORITY_TAG, task_default_priority))
        + -1 * _get_run_priority(pipeline_context)
    )
    priority_for_key = lambda step_key: (
        priority_for_step(execution_plan.get_step_by_key(step_key))
    )
    _warn_on_priority_misuse(pipeline_context, execution_plan)

    step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
    step_errors = {}
    completed_steps = set({})  # Set[step_key]
    active_execution = execution_plan.start(
        retries=pipeline_context.executor_config.retries, sort_key_fn=priority_for_step
    )
    stopping = False

    while (not active_execution.is_complete and not stopping) or step_results:

        results_to_pop = []
        for step_key, result in sorted(step_results.items(), key=lambda x: priority_for_key(x[0])):
            if result.ready():
                try:
                    step_events = result.get()
                except Exception:  # pylint: disable=broad-except
                    # We will want to do more to handle the exception here.. maybe subclass Task
                    # Certainly yield an engine or pipeline event
                    step_events = []
                    step_errors[step_key] = serializable_error_info_from_exc_info(sys.exc_info())
                    stopping = True
                for step_event in step_events:
                    event = deserialize_json_to_dagster_namedtuple(step_event)
                    yield event
                    active_execution.handle_event(event)

                results_to_pop.append(step_key)
                completed_steps.add(step_key)

        for step_key in results_to_pop:
            if step_key in step_results:
                del step_results[step_key]
                active_execution.verify_complete(pipeline_context, step_key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(pipeline_context):
            yield event

        # don't add any new steps if we are stopping
        if stopping:
            continue

        # This is a slight refinement. If we have n workers idle and schedule m > n steps for
        # execution, the first n steps will be picked up by the idle workers in the order in
        # which they are scheduled (and the following m-n steps will be executed in priority
        # order, provided that it takes longer to execute a step than to schedule it). The test
        # case has m >> n to exhibit this behavior in the absence of this sort step.
        for step in active_execution.get_steps_to_execute():
            try:
                queue = step.tags.get(DAGSTER_CELERY_QUEUE_TAG, task_default_queue)
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    'Submitting celery task for step "{step_key}" to queue "{queue}".'.format(
                        step_key=step.key, queue=queue
                    ),
                    EngineEventData(marker_start=DELEGATE_MARKER),
                    step_key=step.key,
                )

                # Get the Celery priority for this step
                priority = _get_step_priority(pipeline_context, step)

                # Submit the Celery tasks
                step_results[step.key] = step_execution_fn(
                    app, pipeline_context, step, queue, priority
                )

            except Exception:
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    'Encountered error during celery task submission.'.format(),
                    event_specific_data=EngineEventData.engine_error(
                        serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                )
                raise

        time.sleep(TICK_SECONDS)

    if step_errors:
        raise DagsterSubprocessError(
            'During celery execution errors occurred in workers:\n{error_list}'.format(
                error_list='\n'.join(
                    [
                        '[{step}]: {err}'.format(step=key, err=err.to_string())
                        for key, err in step_errors.items()
                    ]
                )
            ),
            subprocess_error_infos=list(step_errors.values()),
        )

Пример #15

Показать файл

def test_multiline_logging_complex():
    msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0'
    kwargs = {
        'pipeline':
        'example',
        'pipeline_name':
        'example',
        'step_key':
        'start.materialization.output.result.0',
        'solid':
        'start',
        'solid_definition':
        'emit_num',
        'dagster_event':
        DagsterEvent(
            event_type_value='STEP_FAILURE',
            pipeline_name='error_monster',
            step_key='start.materialization.output.result.0',
            solid_handle=SolidHandle('start', 'emit_num', None),
            step_kind_value='MATERIALIZATION_THUNK',
            logging_tags={
                'pipeline': 'error_monster',
                'step_key': 'start.materialization.output.result.0',
                'solid': 'start',
                'solid_definition': 'emit_num',
            },
            event_specific_data=StepFailureData(
                error=SerializableErrorInfo(
                    message=
                    "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                    stack=[
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n    yield\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _event_sequence_for_step_compute_fn\n    for step_output in gen:\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n    runtime_type.output_materialization_config.materialize_runtime_value(config_spec, runtime_value)\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n    return func(config_value, runtime_value)\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n    return func(selector_key, selector_value, runtime_value)\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n    with open(json_file_path, \'w\') as ff:\n',
                    ],
                    cls_name='FileNotFoundError',
                ),
                user_failure_data=None,
            ),
        ),
    }

    with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger):

        dl = DagsterLogManager('123', {}, [logger])
        dl.info(msg, **kwargs)

        kv_pairs = set(captured_results[0].split('\n')[1:])

    expected_pairs = [
        '        orig_message = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"',
        '              run_id = "123"',
        '            pipeline = "example"',
        '    solid_definition = "emit_num"',
        '       pipeline_name = "example"',
        '               solid = "start"',
        '            step_key = "start.materialization.output.result.0"',
    ]
    for e in expected_pairs:
        assert e in kv_pairs

    assert _regex_match_kv_pair(
        r'      log_message_id = "{0}"'.format(REGEX_UUID), kv_pairs)
    assert _regex_match_kv_pair(
        r'       log_timestamp = "{0}"'.format(REGEX_TS), kv_pairs)

    expected_dagster_event = {
        'event_specific_data': [
            [
                "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                [
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n    yield\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _event_sequence_for_step_compute_fn\n    for step_output in gen:\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n    runtime_type.output_materialization_config.materialize_runtime_value(config_spec, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n    return func(config_value, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n    return func(selector_key, selector_value, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n    with open(json_file_path, \'w\') as ff:\n',
                ],
                'FileNotFoundError',
            ],
            None,  # user_failure_data
        ],
        'event_type_value':
        'STEP_FAILURE',
        'message':
        None,
        'pipeline_name':
        'error_monster',
        'solid_handle': ['start', 'emit_num', None],
        'step_key':
        'start.materialization.output.result.0',
        'step_kind_value':
        'MATERIALIZATION_THUNK',
        'logging_tags': {
            'pipeline': 'error_monster',
            'solid': 'start',
            'solid_definition': 'emit_num',
            'step_key': 'start.materialization.output.result.0',
        },
    }
    dagster_event = json.loads([
        pair for pair in kv_pairs if 'dagster_event' in pair
    ][0].strip('       dagster_event = '))
    assert dagster_event == expected_dagster_event

Пример #16

Показать файл

Файл: step_delegating_executor.py Проект: keyz/dagster

    def execute(self, plan_context: PlanOrchestrationContext,
                execution_plan: ExecutionPlan):
        check.inst_param(plan_context, "plan_context",
                         PlanOrchestrationContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        self._event_cursor = -1  # pylint: disable=attribute-defined-outside-init

        yield DagsterEvent.engine_event(
            plan_context,
            f"Starting execution with step handler {self._step_handler.name}",
            EngineEventData(),
        )

        with execution_plan.start(retry_mode=self.retries) as active_execution:
            running_steps: Dict[str, ExecutionStep] = {}

            if plan_context.resume_from_failure:
                yield DagsterEvent.engine_event(
                    plan_context,
                    "Resuming execution from failure",
                    EngineEventData(),
                )

                prior_events = self._pop_events(
                    plan_context.instance,
                    plan_context.run_id,
                )
                for dagster_event in prior_events:
                    yield dagster_event

                possibly_in_flight_steps = active_execution.rebuild_from_events(
                    prior_events)
                for step in possibly_in_flight_steps:

                    yield DagsterEvent.engine_event(
                        plan_context,
                        "Checking on status of possibly launched steps",
                        EngineEventData(),
                        step.handle,
                    )

                    # TODO: check if failure event included. For now, hacky assumption that
                    # we don't log anything on successful check
                    if self._step_handler.check_step_health(
                            self._get_step_handler_context(
                                plan_context, [step], active_execution)):
                        # health check failed, launch the step
                        self._log_new_events(
                            self._step_handler.launch_step(
                                self._get_step_handler_context(
                                    plan_context, [step], active_execution)),
                            plan_context,
                            {
                                step.key: step
                                for step in possibly_in_flight_steps
                            },
                        )

                    running_steps[step.key] = step

            last_check_step_health_time = pendulum.now("UTC")

            # Order of events is important here. During an interation, we call handle_event, then get_steps_to_execute,
            # then is_complete. get_steps_to_execute updates the state of ActiveExecution, and without it
            # is_complete can return true when we're just between steps.
            while not active_execution.is_complete:

                if active_execution.check_for_interrupts():
                    if not plan_context.instance.run_will_resume(
                            plan_context.run_id):
                        yield DagsterEvent.engine_event(
                            plan_context,
                            "Executor received termination signal, forwarding to steps",
                            EngineEventData.interrupted(
                                list(running_steps.keys())),
                        )
                        active_execution.mark_interrupted()
                        for _, step in running_steps.items():
                            self._log_new_events(
                                self._step_handler.terminate_step(
                                    self._get_step_handler_context(
                                        plan_context, [step],
                                        active_execution)),
                                plan_context,
                                running_steps,
                            )

                    else:
                        yield DagsterEvent.engine_event(
                            plan_context,
                            "Executor received termination signal, not forwarding to steps because "
                            "run will be resumed",
                            EngineEventData(metadata_entries=[
                                EventMetadataEntry.text(
                                    str(running_steps.keys()),
                                    "steps_in_flight")
                            ]),
                        )
                        active_execution.mark_interrupted()

                    return

                for dagster_event in self._pop_events(
                        plan_context.instance,
                        plan_context.run_id,
                ):  # type: ignore

                    # STEP_SKIPPED events are only emitted by ActiveExecution, which already handles
                    # and yields them.
                    if dagster_event.is_step_skipped:
                        assert isinstance(dagster_event.step_key, str)
                        active_execution.verify_complete(
                            plan_context, dagster_event.step_key)

                    else:
                        yield dagster_event
                        active_execution.handle_event(dagster_event)

                        if dagster_event.is_step_success or dagster_event.is_step_failure:
                            assert isinstance(dagster_event.step_key, str)
                            del running_steps[dagster_event.step_key]
                            active_execution.verify_complete(
                                plan_context, dagster_event.step_key)

                # process skips from failures or uncovered inputs
                for event in active_execution.plan_events_iterator(
                        plan_context):
                    yield event

                curr_time = pendulum.now("UTC")
                if (curr_time - last_check_step_health_time).total_seconds(
                ) >= self._check_step_health_interval_seconds:
                    last_check_step_health_time = curr_time
                    for _, step in running_steps.items():
                        self._log_new_events(
                            self._step_handler.check_step_health(
                                self._get_step_handler_context(
                                    plan_context, [step], active_execution)),
                            plan_context,
                            running_steps,
                        )

                for step in active_execution.get_steps_to_execute():
                    running_steps[step.key] = step
                    self._log_new_events(
                        self._step_handler.launch_step(
                            self._get_step_handler_context(
                                plan_context, [step], active_execution)),
                        plan_context,
                        running_steps,
                    )

                time.sleep(self._sleep_seconds)

Пример #17

Показать файл

def host_mode_execution_context_event_generator(
    pipeline,
    execution_plan,
    run_config,
    pipeline_run,
    instance,
    raise_on_error,
    executor_defs,
    output_capture,
    resume_from_failure: bool = False,
):
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)

    check.dict_param(run_config, "run_config", key_type=str)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    executor_defs = check.list_param(executor_defs,
                                     "executor_defs",
                                     of_type=ExecutorDefinition)
    check.bool_param(raise_on_error, "raise_on_error")
    check.invariant(output_capture is None)

    execution_context = None

    loggers = []

    for (logger_def, logger_config) in default_system_loggers():
        loggers.append(
            logger_def.logger_fn(
                InitLoggerContext(
                    logger_config,
                    pipeline_def=None,
                    logger_def=logger_def,
                    run_id=pipeline_run.run_id,
                )))

    log_manager = DagsterLogManager.create(loggers=loggers,
                                           pipeline_run=pipeline_run,
                                           instance=instance)

    try:
        executor = _get_host_mode_executor(pipeline, run_config, executor_defs,
                                           instance)
        execution_context = PlanOrchestrationContext(
            plan_data=PlanData(
                pipeline=pipeline,
                pipeline_run=pipeline_run,
                instance=instance,
                execution_plan=execution_plan,
                raise_on_error=raise_on_error,
                retry_mode=executor.retries,
            ),
            log_manager=log_manager,
            executor=executor,
            output_capture=None,
            resume_from_failure=resume_from_failure,
        )

        yield execution_context

    except DagsterError as dagster_error:
        if execution_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info  # type: ignore
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            event = DagsterEvent.pipeline_failure(
                pipeline_context_or_name=pipeline_run.pipeline_name,
                context_msg=
                (f'Pipeline failure during initialization for pipeline "{pipeline_run.pipeline_name}". '
                 "This may be due to a failure in initializing the executor or one of the loggers."
                 ),
                error_info=error_info,
            )
            log_manager.log_dagster_event(
                level=logging.ERROR,
                msg=event.message,
                dagster_event=event  # type: ignore
            )
            yield event
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error

Пример #18

Показать файл

Файл: core_execution_loop.py Проект: uttasarga9067/dagster

def core_celery_execution_loop(pipeline_context, execution_plan,
                               step_execution_fn):

    check.inst_param(pipeline_context, "pipeline_context",
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.callable_param(step_execution_fn, "step_execution_fn")

    executor = pipeline_context.executor

    # https://github.com/dagster-io/dagster/issues/2440
    check.invariant(
        execution_plan.artifacts_persisted,
        "Cannot use in-memory storage with Celery, use filesystem (on top of NFS or "
        "similar system that allows files to be available to all nodes), S3, or GCS",
    )

    app = make_app(executor.app_args())

    priority_for_step = lambda step: (-1 * int(
        step.tags.get(DAGSTER_CELERY_STEP_PRIORITY_TAG, task_default_priority)
    ) + -1 * _get_run_priority(pipeline_context))
    priority_for_key = lambda step_key: (priority_for_step(
        execution_plan.get_step_by_key(step_key)))
    _warn_on_priority_misuse(pipeline_context, execution_plan)

    step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
    step_errors = {}

    with execution_plan.start(
            retries=pipeline_context.executor.retries,
            sort_key_fn=priority_for_step,
    ) as active_execution:

        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:
            if active_execution.check_for_interrupts():
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    "Celery executor: received termination signal - revoking active tasks from workers",
                    EngineEventData.interrupted(list(step_results.keys())),
                )
                stopping = True
                active_execution.mark_interrupted()
                for result in step_results.values():
                    result.revoke()
            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except TaskRevokedError:
                        step_events = []
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            'celery task for running step "{step_key}" was revoked.'
                            .format(step_key=step_key, ),
                            EngineEventData(marker_end=DELEGATE_MARKER),
                            step_handle=active_execution.get_step_by_key(
                                step_key).handle,
                        )
                    except Exception:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        active_execution.handle_event(event)

                    results_to_pop.append(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    active_execution.verify_complete(pipeline_context,
                                                     step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                yield event

            # don't add any new steps if we are stopping
            if stopping or step_errors:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    queue = step.tags.get(DAGSTER_CELERY_QUEUE_TAG,
                                          task_default_queue)
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Submitting celery task for step "{step_key}" to queue "{queue}".'
                        .format(step_key=step.key, queue=queue),
                        EngineEventData(marker_start=DELEGATE_MARKER),
                        step_handle=step.handle,
                    )

                    # Get the Celery priority for this step
                    priority = _get_step_priority(pipeline_context, step)

                    # Submit the Celery tasks
                    step_results[step.key] = step_execution_fn(
                        app, pipeline_context, step, queue, priority)

                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Encountered error during celery task submission.".
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                "During celery execution errors occurred in workers:\n{error_list}"
                .format(error_list="\n".join([
                    "[{step}]: {err}".format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )

Пример #19

Показать файл

    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        intermediates_manager = pipeline_context.intermediates_manager

        limit = pipeline_context.executor_config.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps using multiprocess engine: parent process (pid: {pid})'
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:

            active_execution = execution_plan.start(
                retries=pipeline_context.executor_config.retries)
            active_iters = {}
            errors = {}
            term_events = {}
            stopping = False

            while (not stopping
                   and not active_execution.is_complete) or active_iters:
                try:
                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[
                                step.key] = get_multiprocessing_context(
                                ).Event()
                            active_iters[
                                step.key] = execute_step_out_of_process(
                                    step_context, step, errors, term_events)

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        if term_events[key].is_set():
                            stopping = True
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skips from failures or uncovered inputs
                    for event in active_execution.skipped_step_events_iterator(
                            pipeline_context):
                        yield event

                # In the very small chance that we get interrupted in this coordination section and not
                # polling the subprocesses for events - try to clean up gracefully
                except KeyboardInterrupt:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes',
                        EngineEventData.interrupted(list(term_events.keys())),
                    )
                    stopping = True
                    for event in term_events.values():
                        event.set()

            errs = {pid: err for pid, err in errors.items() if err}
            if errs:
                raise DagsterSubprocessError(
                    'During multiprocess execution errors occurred in child processes:\n{error_list}'
                    .format(error_list='\n'.join([
                        'In process {pid}: {err}'.format(pid=pid,
                                                         err=err.to_string())
                        for pid, err in errs.items()
                    ])),
                    subprocess_error_infos=list(errs.values()),
                )

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Multiprocess engine: parent process exiting after {duration} (pid: {pid})'
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )

Пример #20

Показать файл

Файл: execute_step.py Проект: G9999/dagster

def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                if isinstance(input_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            op, value_name=input_name))
                elif isinstance(input_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context, input_value)
            inputs[input_name] = [op.obj for op in input_value]
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, input_value)
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))

Пример #21

Показать файл

Файл: engine_inprocess.py Проект: kgtdbx/dagster

    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(step_keys_to_execute)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, ExecutorConfig),
            'pipeline_context',
            'Expected executor_config to be ExecutorConfig got {}'.format(
                pipeline_context.executor_config
            ),
        )

        failed_or_skipped_steps = set()

        step_levels = execution_plan.topological_step_levels()

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        for step_level in step_levels:
            for step in step_level:
                if step_key_set and step.key not in step_key_set:
                    continue

                step_context = pipeline_context.for_step(step)

                failed_inputs = [
                    step_input.prev_output_handle.step_key
                    for step_input in step.step_inputs
                    if step_input.is_from_output
                    and step_input.prev_output_handle.step_key in failed_or_skipped_steps
                ]
                if failed_inputs:
                    step_context.log.info(
                        (
                            'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                        ).format(step=step.key, failed_inputs=failed_inputs)
                    )
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                    step_context, step
                )
                if uncovered_inputs:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key)

                    step_context.log.info(
                        (
                            'Not all inputs covered for {step}. Not executing. Output missing for '
                            'inputs: {uncovered_inputs}'
                        ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                    )
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                for step_event in check.generator(dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    if step_event.is_step_failure:
                        failed_or_skipped_steps.add(step.key)

                    yield step_event

Пример #22

Показать файл

def resource_initialization_event_generator(
    resource_defs: Dict[str, ResourceDefinition],
    resource_configs: Dict[str, ResourceConfig],
    log_manager: DagsterLogManager,
    execution_plan: Optional[ExecutionPlan],
    pipeline_run: Optional[PipelineRun],
    resource_keys_to_init: Optional[Set[str]],
    instance: Optional[DagsterInstance],
    resource_instances_to_override: Optional[Dict[str, "InitializedResource"]],
    emit_persistent_events: Optional[bool],
):
    check.inst_param(log_manager, "log_manager", DagsterLogManager)
    resource_keys_to_init = check.opt_set_param(resource_keys_to_init,
                                                "resource_keys_to_init",
                                                of_type=str)
    check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.opt_inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.opt_inst_param(instance, "instance", DagsterInstance)
    check.opt_dict_param(resource_instances_to_override,
                         "resource_instances_to_override")

    if execution_plan and execution_plan.step_handle_for_single_step_plans():
        step = execution_plan.get_step(
            cast(
                StepHandleUnion,
                cast(ExecutionPlan,
                     execution_plan).step_handle_for_single_step_plans(),
            ))
        resource_log_manager = log_manager.with_tags(
            **cast(ExecutionStep, step).logging_tags)
    else:
        resource_log_manager = log_manager

    generator_closed = False
    resource_managers: Deque[EventGenerationManager] = deque()

    try:

        yield from _core_resource_initialization_event_generator(
            resource_defs=resource_defs,
            resource_configs=resource_configs,
            resource_log_manager=resource_log_manager,
            resource_managers=resource_managers,
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            resource_keys_to_init=resource_keys_to_init,
            instance=instance,
            resource_instances_to_override=resource_instances_to_override,
            emit_persistent_events=emit_persistent_events,
        )
    except GeneratorExit:
        # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed
        # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/).
        generator_closed = True
        raise
    finally:
        if not generator_closed:
            error = None
            while len(resource_managers) > 0:
                manager = resource_managers.pop()
                try:
                    yield from manager.generate_teardown_events()
                except DagsterUserCodeExecutionError as dagster_user_error:
                    error = dagster_user_error
            if error:
                yield DagsterEvent.resource_teardown_failure(
                    execution_plan,
                    resource_log_manager,
                    resource_keys_to_init,
                    serializable_error_info_from_exc_info(
                        error.original_exc_info),
                )

Пример #23

Показать файл

def _core_resource_initialization_event_generator(
    execution_plan,
    environment_config,
    pipeline_run,
    resource_keys_to_init,
    resource_log_manager,
    resource_managers,
):
    pipeline_def = execution_plan.pipeline_def
    resource_instances = {}
    mode_definition = pipeline_def.get_mode_definition(pipeline_run.mode)
    resource_init_times = {}
    try:
        if resource_keys_to_init:
            yield DagsterEvent.resource_init_start(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
            )

        for resource_name, resource_def in sorted(
                mode_definition.resource_defs.items()):
            if not resource_name in resource_keys_to_init:
                continue

            resource_context = InitResourceContext(
                pipeline_def=pipeline_def,
                resource_def=resource_def,
                resource_config=environment_config.resources.get(
                    resource_name, {}).get("config"),
                run_id=pipeline_run.run_id,
                # Add tags with information about the resource
                log_manager=resource_log_manager.with_tags(
                    resource_name=resource_name,
                    resource_fn_name=str(resource_def.resource_fn.__name__),
                ),
            )
            manager = single_resource_generation_manager(
                resource_context, resource_name, resource_def)
            for event in manager.generate_setup_events():
                if event:
                    yield event
            initialized_resource = check.inst(manager.get_object(),
                                              InitializedResource)
            resource_instances[resource_name] = initialized_resource.resource
            resource_init_times[resource_name] = initialized_resource.duration
            resource_managers.append(manager)

        if resource_keys_to_init:
            yield DagsterEvent.resource_init_success(execution_plan,
                                                     resource_log_manager,
                                                     resource_instances,
                                                     resource_init_times)
        yield ScopedResourcesBuilder(resource_instances)
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield DagsterEvent.resource_init_failure(
            execution_plan,
            resource_log_manager,
            resource_keys_to_init,
            serializable_error_info_from_exc_info(
                dagster_user_error.original_exc_info),
        )
        raise dagster_user_error

Пример #24

Показать файл

def _core_resource_initialization_event_generator(
    resource_defs: Dict[str, ResourceDefinition],
    resource_configs: Dict[str, ResourceConfig],
    resource_log_manager: DagsterLogManager,
    resource_managers: Deque[EventGenerationManager],
    execution_plan: Optional[ExecutionPlan],
    pipeline_run: Optional[PipelineRun],
    resource_keys_to_init: Optional[Set[str]],
    instance: Optional[DagsterInstance],
    resource_instances_to_override: Optional[Dict[str, "InitializedResource"]],
    emit_persistent_events: Optional[bool],
):
    if emit_persistent_events:
        check.invariant(
            execution_plan,
            "If emit_persistent_events is enabled, then execution_plan must be provided",
        )
    resource_instances_to_override = check.opt_dict_param(
        resource_instances_to_override, "resource_instances_to_override")
    resource_keys_to_init = check.opt_set_param(resource_keys_to_init,
                                                "resource_keys_to_init")
    resource_instances: Dict[str, "InitializedResource"] = {}
    resource_init_times = {}
    try:
        if emit_persistent_events and resource_keys_to_init:
            yield DagsterEvent.resource_init_start(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
            )

        resource_dependencies = _resolve_resource_dependencies(resource_defs)

        for level in toposort(resource_dependencies):
            for resource_name in level:

                if resource_name in resource_instances_to_override:
                    # use the given resource instances instead of re-initiating it from resource def
                    resource_def = ResourceDefinition.hardcoded_resource(
                        resource_instances_to_override[resource_name])
                else:
                    resource_def = resource_defs[resource_name]
                if not resource_name in resource_keys_to_init:
                    continue

                resource_context = InitResourceContext(
                    resource_def=resource_def,
                    resource_config=resource_configs[resource_name].config,
                    pipeline_run=pipeline_run,
                    # Add tags with information about the resource
                    log_manager=resource_log_manager.with_tags(
                        resource_name=resource_name,
                        resource_fn_name=str(
                            resource_def.resource_fn.__name__),
                    ),
                    resource_instance_dict=resource_instances,
                    required_resource_keys=resource_def.required_resource_keys,
                    instance=instance,
                    pipeline_def_for_backwards_compat=execution_plan.
                    pipeline_def if execution_plan else None,
                )
                manager = single_resource_generation_manager(
                    resource_context, resource_name, resource_def)
                for event in manager.generate_setup_events():
                    if event:
                        yield event
                initialized_resource = check.inst(manager.get_object(),
                                                  InitializedResource)
                resource_instances[
                    resource_name] = initialized_resource.resource
                resource_init_times[
                    resource_name] = initialized_resource.duration
                resource_managers.append(manager)

        if emit_persistent_events and resource_keys_to_init:
            yield DagsterEvent.resource_init_success(execution_plan,
                                                     resource_log_manager,
                                                     resource_instances,
                                                     resource_init_times)
        yield ScopedResourcesBuilder(resource_instances)
    except DagsterUserCodeExecutionError as dagster_user_error:
        # Can only end up in this state if we attempt to initialize a resource, so
        # resource_keys_to_init cannot be empty
        if emit_persistent_events:
            yield DagsterEvent.resource_init_failure(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
                serializable_error_info_from_exc_info(
                    dagster_user_error.original_exc_info),
            )
        raise dagster_user_error

Пример #25

Показать файл

Файл: execute_plan.py Проект: spencer-zepelin/dagster

def _dagster_event_sequence_for_step(step_context, retries):
    '''
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (5) Framework failure or interrupt:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.inst_param(retries, 'retries', Retries)

    try:
        prior_attempt_count = retries.get_attempt_count(step_context.step.key)
        if step_context.step_launcher:
            step_events = step_context.step_launcher.launch_step(
                step_context, prior_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(
                step_context, prior_attempt_count)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if retries.disabled:
            fail_err = SerializableErrorInfo(
                message='RetryRequested but retries are disabled',
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = retries.get_attempt_count(step_context.step.key)
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message='Exceeded max_retries of {}'.format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                attempt_num = prev_attempts + 1
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        yield _step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label='intentional-failure',
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield _step_failure_event_from_exc_info(
            step_context,
            dagster_user_error.original_exc_info,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except DagsterError as dagster_error:
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        if step_context.raise_on_error:
            raise dagster_error

    # case (5) in top comment
    except (Exception, KeyboardInterrupt) as unexpected_exception:  # pylint: disable=broad-except
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        raise unexpected_exception

Пример #26

Показать файл

    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess executor: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            with execution_plan.start(
                    retry_mode=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    if active_execution.check_for_interrupts():
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess executor: received termination signal - "
                            "forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        active_execution.mark_interrupted()
                        for key, event in term_events.items():
                            event.set()

                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[step.key] = multiprocessing.Event()
                            active_iters[
                                step.key] = self.execute_step_out_of_process(
                                    step_context,
                                    step,
                                    errors,
                                    term_events,
                                    active_execution.get_known_state(),
                                )

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except ChildProcessCrashException as crash:
                            serializable_error = serializable_error_info_from_exc_info(
                                sys.exc_info())
                            yield DagsterEvent.engine_event(
                                pipeline_context,
                                ("Multiprocess executor: child process for step {step_key} "
                                 "unexpectedly exited with code {exit_code}"
                                 ).format(step_key=key,
                                          exit_code=crash.exit_code),
                                EngineEventData.engine_error(
                                    serializable_error),
                                step_handle=active_execution.get_step_by_key(
                                    key).handle,
                            )
                            step_failure_event = DagsterEvent.step_failure_event(
                                step_context=pipeline_context.for_step(
                                    active_execution.get_step_by_key(key)),
                                step_failure_data=StepFailureData(
                                    error=serializable_error,
                                    user_failure_data=None),
                            )
                            active_execution.handle_event(step_failure_event)
                            yield step_failure_event
                            empty_iters.append(key)
                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skipped and abandoned steps
                    yield from active_execution.plan_events_iterator(
                        pipeline_context)

                errs = {pid: err for pid, err in errors.items() if err}

                # After termination starts, raise an interrupted exception once all subprocesses
                # have finished cleaning up (and the only errors were from being interrupted)
                if (stopping and (not active_iters) and all([
                        err_info.cls_name == "DagsterExecutionInterruptedError"
                        for err_info in errs.values()
                ])):
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Multiprocess executor: interrupted all active child processes",
                        event_specific_data=EngineEventData(),
                    )
                    raise DagsterExecutionInterruptedError()
                elif errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess executor: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )

Пример #27

Показать файл

Файл: context_creation_pipeline.py Проект: yangchenghuang/dagster

def scoped_pipeline_context(
    pipeline_def,
    environment_dict,
    run_config,
    instance,
    system_storage_data=None,
    scoped_resources_builder_cm=create_resource_builder,
    raise_on_error=False,
):
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
    check.dict_param(environment_dict, 'environment_dict', key_type=str)
    check.inst_param(run_config, 'run_config', RunConfig)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.opt_inst_param(system_storage_data, 'system_storage_data',
                         SystemStorageData)

    context_creation_data = create_context_creation_data(
        pipeline_def, environment_dict, run_config, instance)

    executor_config = create_executor_config(context_creation_data)

    # After this try block, a Dagster exception thrown will result in a pipeline init failure event.
    pipeline_context = None
    try:
        executor_config.check_requirements(
            instance, context_creation_data.system_storage_def)

        log_manager = create_log_manager(context_creation_data)

        with scoped_resources_builder_cm(
                context_creation_data.pipeline_def,
                context_creation_data.environment_config,
                context_creation_data.run_config,
                log_manager,
        ) as scoped_resources_builder:

            system_storage_data = create_system_storage_data(
                context_creation_data, system_storage_data,
                scoped_resources_builder)

            pipeline_context = construct_pipeline_execution_context(
                context_creation_data=context_creation_data,
                scoped_resources_builder=scoped_resources_builder,
                system_storage_data=system_storage_data,
                log_manager=log_manager,
                executor_config=executor_config,
                raise_on_error=raise_on_error,
            )
            yield pipeline_context

    except DagsterError as dagster_error:
        # only yield an init failure event if we haven't already yielded context
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())

            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)
            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(
                    instance, run_config, pipeline_def),
            )

            if raise_on_error:
                raise dagster_error

        # if we've caught an error after context init we're in a problematic state and should just raise
        else:
            raise dagster_error

Пример #28

Показать файл

def orchestration_context_event_generator(
    pipeline: IPipeline,
    execution_plan: ExecutionPlan,
    run_config: Dict[str, Any],
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool,
    executor_defs: Optional[List[ExecutorDefinition]],
    output_capture: Optional[Dict["StepOutputHandle", Any]],
) -> Generator[Union[DagsterEvent, PlanOrchestrationContext], None, None]:
    check.invariant(executor_defs is None)
    context_creation_data = create_context_creation_data(
        pipeline,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
    )

    log_manager = create_log_manager(context_creation_data)

    try:
        executor = create_executor(context_creation_data)

        execution_context = PlanOrchestrationContext(
            plan_data=create_plan_data(context_creation_data, raise_on_error,
                                       executor.retries),
            log_manager=log_manager,
            executor=executor,
            output_capture=output_capture,
        )

        _validate_plan_with_context(execution_context, execution_plan)

        yield execution_context
    except DagsterError as dagster_error:
        dagster_error = cast(DagsterUserCodeExecutionError, dagster_error)
        user_facing_exc_info = (
            # pylint does not know original_exc_info exists is is_user_code_error is true
            # pylint: disable=no-member
            dagster_error.original_exc_info
            if dagster_error.is_user_code_error else sys.exc_info())
        error_info = serializable_error_info_from_exc_info(
            user_facing_exc_info)

        event = DagsterEvent.pipeline_failure(
            pipeline_context_or_name=pipeline_run.pipeline_name,
            context_msg=
            (f'Pipeline failure during initialization for pipeline "{pipeline_run.pipeline_name}". '
             "This may be due to a failure in initializing the executor or one of the loggers."
             ),
            error_info=error_info,
        )
        log_manager.error(
            event.message,
            dagster_event=event,
            pipeline_name=pipeline_run.pipeline_name,
        )
        yield event

        if raise_on_error:
            raise dagster_error

Пример #29

Показать файл

Файл: api.py Проект: prezi/dagster

def pipeline_execution_iterator(
        pipeline_context: PlanOrchestrationContext,
        execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]:
    """A complete execution of a pipeline. Yields pipeline start, success,
    and failure events.

    Args:
        pipeline_context (PlanOrchestrationContext):
        execution_plan (ExecutionPlan):
    """

    yield DagsterEvent.pipeline_start(pipeline_context)

    pipeline_exception_info = None
    pipeline_canceled_info = None
    failed_steps = []
    generator_closed = False
    try:
        for event in pipeline_context.executor.execute(pipeline_context,
                                                       execution_plan):
            if event.is_step_failure:
                failed_steps.append(event.step_key)

            yield event
    except GeneratorExit:
        # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed
        # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/).
        generator_closed = True
        pipeline_exception_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        if pipeline_context.raise_on_error:
            raise
    except (KeyboardInterrupt, DagsterExecutionInterruptedError):
        pipeline_canceled_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        if pipeline_context.raise_on_error:
            raise
    except Exception:  # pylint: disable=broad-except
        pipeline_exception_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        if pipeline_context.raise_on_error:
            raise  # finally block will run before this is re-raised
    finally:
        if pipeline_canceled_info:
            reloaded_run = pipeline_context.instance.get_run_by_id(
                pipeline_context.run_id)
            if reloaded_run and reloaded_run.status == PipelineRunStatus.CANCELING:
                event = DagsterEvent.pipeline_canceled(pipeline_context,
                                                       pipeline_canceled_info)
            else:
                event = DagsterEvent.pipeline_failure(
                    pipeline_context,
                    "Execution was interrupted unexpectedly. "
                    "No user initiated termination request was found, treating as failure.",
                    pipeline_canceled_info,
                )
        elif pipeline_exception_info:
            event = DagsterEvent.pipeline_failure(
                pipeline_context,
                "An exception was thrown during execution.",
                pipeline_exception_info,
            )
        elif failed_steps:
            event = DagsterEvent.pipeline_failure(
                pipeline_context,
                "Steps failed: {}.".format(failed_steps),
            )
        else:
            event = DagsterEvent.pipeline_success(pipeline_context)
        if not generator_closed:
            yield event

Пример #30

Показать файл

    def check_step_health(
            self,
            step_handler_context: StepHandlerContext) -> List[DagsterEvent]:
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        client = self._get_client()

        container_name = self._get_container_name(
            step_handler_context.execute_step_args.pipeline_run_id,
            step_key,
        )

        try:
            container = client.containers.get(container_name)

        except Exception as e:
            return [
                DagsterEvent(
                    event_type_value=DagsterEventType.STEP_FAILURE.value,
                    pipeline_name=step_handler_context.execute_step_args.
                    pipeline_origin.pipeline_name,
                    step_key=step_key,
                    message=
                    f"Error when checking on step container health: {e}",
                    event_specific_data=StepFailureData(
                        error=None,
                        user_failure_data=None,
                    ),
                )
            ]

        if container.status == "running":
            return []

        try:
            container_info = container.wait(timeout=0.1)
        except Exception as e:
            return [
                DagsterEvent(
                    event_type_value=DagsterEventType.STEP_FAILURE.value,
                    pipeline_name=step_handler_context.execute_step_args.
                    pipeline_origin.pipeline_name,
                    step_key=step_key,
                    message=
                    f"Container status is {container.status}. Hit exception attempting to get its return code: {e}",
                    event_specific_data=StepFailureData(
                        error=None,
                        user_failure_data=None,
                    ),
                )
            ]

        ret_code = container_info.get("StatusCode")
        if ret_code == 0:
            return []

        return [
            DagsterEvent(
                event_type_value=DagsterEventType.STEP_FAILURE.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Container status is {container.status}. Return code is {str(ret_code)}.",
                event_specific_data=StepFailureData(
                    error=None,
                    user_failure_data=None,
                ),
            )
        ]