Exemplo n.º 1
0
    def execute(pipeline_context, execution_plan):
        from .tasks import make_app

        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, CeleryConfig),
            'pipeline_context',
            'Expected executor_config to be CeleryConfig got {}'.format(
                pipeline_context.executor_config),
        )

        celery_config = pipeline_context.executor_config

        storage = pipeline_context.environment_dict.get('storage')

        if (celery_config.broker and not is_local_uri(celery_config.broker)
            ) or (celery_config.backend
                  and not is_local_uri(celery_config.backend)):
            check.invariant(
                storage.get('s3') or storage.get('gcs'),
                'Must use S3 or GCS storage with non-local Celery broker: {broker} '
                'and backend: {backend}'.format(broker=celery_config.broker,
                                                backend=celery_config.backend),
            )
        else:
            check.invariant(
                not storage.get('in_memory'),
                'Cannot use in-memory storage with Celery, use filesystem, S3, or GCS',
            )

        app = make_app(celery_config)

        priority_for_step = lambda step: (-1 * int(
            step.tags.get('dagster-celery/priority', task_default_priority)
        ) + -1 * _get_run_priority(pipeline_context))
        priority_for_key = lambda step_key: (priority_for_step(
            execution_plan.get_step_by_key(step_key)))
        _warn_on_priority_misuse(pipeline_context, execution_plan)

        step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
        step_errors = {}
        completed_steps = set({})  # Set[step_key]
        active_execution = execution_plan.start(
            retries=pipeline_context.executor_config.retries,
            sort_key_fn=priority_for_step)
        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:

            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except Exception as e:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                        stopping = True
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        active_execution.handle_event(event)

                    results_to_pop.append(step_key)
                    completed_steps.add(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    active_execution.verify_complete(pipeline_context,
                                                     step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.skipped_step_events_iterator(
                    pipeline_context):
                yield event

            # don't add any new steps if we are stopping
            if stopping:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    queue = step.tags.get('dagster-celery/queue',
                                          task_default_queue)
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Submitting celery task for step "{step_key}" to queue "{queue}".'
                        .format(step_key=step.key, queue=queue),
                        EngineEventData(marker_start=DELEGATE_MARKER),
                        step_key=step.key,
                    )
                    step_results[step.key] = _submit_task(
                        app, pipeline_context, step, queue)
                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Encountered error during celery task submission.'.
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                'During celery execution errors occurred in workers:\n{error_list}'
                .format(error_list='\n'.join([
                    '[{step}]: {err}'.format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )
Exemplo n.º 2
0
def inner_plan_execution_iterator(pipeline_context, execution_plan, retries):
    check.inst_param(pipeline_context, 'pipeline_context',
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(retries, 'retries', Retries)

    for event in copy_required_intermediates_for_execution(
            pipeline_context, execution_plan):
        yield event

    # It would be good to implement a reference tracking algorithm here to
    # garbage collect results that are no longer needed by any steps
    # https://github.com/dagster-io/dagster/issues/811
    active_execution = execution_plan.start(retries=retries)
    while not active_execution.is_complete:
        step = active_execution.get_next_step()

        step_context = pipeline_context.for_step(step)

        missing_resources = [
            resource_key
            for resource_key in step_context.required_resource_keys
            if not hasattr(step_context.resources, resource_key)
        ]
        check.invariant(
            len(missing_resources) == 0,
            ('Expected step context for solid {solid_name} to have all required resources, but '
             'missing {missing_resources}.').format(
                 solid_name=step_context.solid.name,
                 missing_resources=missing_resources),
        )

        with pipeline_context.instance.compute_log_manager.watch(
                step_context.pipeline_run, step_context.step.key):
            # capture all of the logs for this step
            uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                step_context, step)
            if uncovered_inputs:
                # In partial pipeline execution, we may end up here without having validated the
                # missing dependent outputs were optional
                _assert_missing_inputs_optional(uncovered_inputs,
                                                execution_plan, step.key)

                step_context.log.info((
                    'Not all inputs covered for {step}. Not executing. Output missing for '
                    'inputs: {uncovered_inputs}').format(
                        uncovered_inputs=uncovered_inputs, step=step.key))
                yield DagsterEvent.step_skipped_event(step_context)
                active_execution.mark_skipped(step.key)
            else:
                for step_event in check.generator(
                        _dagster_event_sequence_for_step(
                            step_context, retries)):
                    check.inst(step_event, DagsterEvent)
                    yield step_event
                    active_execution.handle_event(step_event)

            active_execution.verify_complete(pipeline_context, step.key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(
                pipeline_context):
            yield event
Exemplo n.º 3
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(
            error_info,
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    # We should update the GraphQL response so that clients don't need to do this handle parsing.
    # See: https://github.com/dagster-io/dagster/issues/1559
    keys = event_dict['step']['solidHandleID'].split('.')
    handle = None
    while keys:
        handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=handle,
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
Exemplo n.º 4
0
def _dagster_event_sequence_for_step(
        step_context: StepExecutionContext) -> Iterator[DagsterEvent]:
    """
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) Execution interrupted:
            The run was interrupted in the middle of execution (typically by a
            termination request).

        (5) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (6) Framework failure:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    """

    check.inst_param(step_context, "step_context", StepExecutionContext)

    try:
        if step_context.step_launcher:
            # info all on step_context - should deprecate second arg
            step_events = step_context.step_launcher.launch_step(
                step_context, step_context.previous_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(step_context)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if step_context.retry_mode.disabled:
            fail_err = SerializableErrorInfo(
                message="RetryRequested but retries are disabled",
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            step_context.capture_step_exception(retry_request)
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = step_context.previous_attempt_count
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message="Exceeded max_retries of {}".format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                step_context.capture_step_exception(retry_request)
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        step_context.capture_step_exception(failure)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label="intentional-failure",
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        step_context.capture_step_exception(dagster_user_error.user_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.USER_CODE_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except (KeyboardInterrupt,
            DagsterExecutionInterruptedError) as interrupt_error:
        step_context.capture_step_exception(interrupt_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.INTERRUPT,
        )
        raise interrupt_error

    # case (5) in top comment
    except DagsterError as dagster_error:
        step_context.capture_step_exception(dagster_error)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.FRAMEWORK_ERROR,
        )

        if step_context.raise_on_error:
            raise dagster_error

    # case (6) in top comment
    except Exception as unexpected_exception:  # pylint: disable=broad-except
        step_context.capture_step_exception(unexpected_exception)
        yield step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            error_source=ErrorSource.UNEXPECTED_ERROR,
        )
        raise unexpected_exception
Exemplo n.º 5
0
    def launch_step(self, step_handler_context: StepHandlerContext):
        events = []

        assert (len(
            step_handler_context.execute_step_args.step_keys_to_execute) == 1
                ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        job_name = self._get_k8s_step_job_name(step_handler_context)
        pod_name = job_name

        args = step_handler_context.execute_step_args.get_command_args()

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                step_handler_context.execute_step_args.pipeline_origin.
                repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the job")

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(step_handler_context.step_tags[step_key]))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="step_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            labels={
                "dagster/job":
                step_handler_context.execute_step_args.pipeline_origin.
                pipeline_name,
                "dagster/op":
                step_key,
            },
        )

        events.append(
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Executing step {step_key} in Kubernetes job {job_name}",
                event_specific_data=EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                ], ),
            ))

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self._job_namespace)

        return events
Exemplo n.º 6
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        step_levels = execution_plan.execution_step_levels()

        intermediates_manager = pipeline_context.intermediates_manager

        limit = pipeline_context.executor_config.max_concurrent

        step_key_set = set(step.key for step in execution_plan.execution_steps())

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps using multiprocess engine: parent process (pid: {pid})'.format(
                pid=os.getpid()
            ),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(), step_keys_to_execute=step_key_set
            ),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            for event in copy_required_intermediates_for_execution(
                pipeline_context, execution_plan
            ):
                yield event

            for step_level in step_levels:
                step_contexts_to_execute = []
                for step in step_level:
                    step_context = pipeline_context.for_step(step)

                    if not intermediates_manager.all_inputs_covered(step_context, step):
                        uncovered_inputs = intermediates_manager.uncovered_inputs(
                            step_context, step
                        )
                        step_context.log.error(
                            (
                                'Not all inputs covered for {step}. Not executing.'
                                'Output missing for inputs: {uncovered_inputs}'
                            ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                        )
                        continue

                    step_contexts_to_execute.append(step_context)
                for step_event in bounded_parallel_executor(
                    pipeline_context, step_contexts_to_execute, limit
                ):
                    yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Multiprocess engine: parent process exiting after {duration} (pid: {pid})'.format(
                duration=format_duration(timer_result.millis), pid=os.getpid()
            ),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
Exemplo n.º 7
0
def core_dagster_event_sequence_for_step(
        step_context: SystemStepExecutionContext,
        prior_attempt_count: int) -> Iterator[DagsterEvent]:
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}

    for step_input in step_context.step.step_inputs:
        input_def = step_input.source.get_input_def(step_context.pipeline_def)
        dagster_type = input_def.dagster_type

        if dagster_type.kind == DagsterTypeKind.NOTHING:
            continue

        for event_or_input_value in ensure_gen(
                step_input.source.load_input_object(step_context)):
            if isinstance(event_or_input_value, DagsterEvent):
                yield event_or_input_value
            else:
                check.invariant(step_input.name not in inputs)
                inputs[step_input.name] = event_or_input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, (Output, DynamicOutput)):
                for evt in _type_check_and_store_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Exemplo n.º 8
0
    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(step_keys_to_execute)

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps in process (pid: {pid})'.format(pid=os.getpid()),
            event_specific_data=EngineEventData.in_process(os.getpid(), step_key_set),
        )

        with time_execution_scope() as timer_result:
            check.param_invariant(
                isinstance(pipeline_context.executor_config, ExecutorConfig),
                'pipeline_context',
                'Expected executor_config to be ExecutorConfig got {}'.format(
                    pipeline_context.executor_config
                ),
            )

            failed_or_skipped_steps = set()

            step_levels = execution_plan.topological_step_levels()

            # It would be good to implement a reference tracking algorithm here to
            # garbage collect results that are no longer needed by any steps
            # https://github.com/dagster-io/dagster/issues/811
            for step_level in step_levels:
                for step in step_level:
                    if step_key_set and step.key not in step_key_set:
                        continue

                    step_context = pipeline_context.for_step(step)

                    with mirror_step_io(step_context):
                        # capture all of the logs for this step

                        failed_inputs = []
                        for step_input in step.step_inputs:
                            failed_inputs.extend(
                                failed_or_skipped_steps.intersection(step_input.dependency_keys)
                            )

                        if failed_inputs:
                            step_context.log.info(
                                (
                                    'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                                ).format(step=step.key, failed_inputs=failed_inputs)
                            )
                            failed_or_skipped_steps.add(step.key)
                            yield DagsterEvent.step_skipped_event(step_context)
                            continue

                        uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                            step_context, step
                        )
                        if uncovered_inputs:
                            # In partial pipeline execution, we may end up here without having validated the
                            # missing dependent outputs were optional
                            _assert_missing_inputs_optional(
                                uncovered_inputs, execution_plan, step.key
                            )

                            step_context.log.info(
                                (
                                    'Not all inputs covered for {step}. Not executing. Output missing for '
                                    'inputs: {uncovered_inputs}'
                                ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                            )
                            failed_or_skipped_steps.add(step.key)
                            yield DagsterEvent.step_skipped_event(step_context)
                            continue

                        for step_event in check.generator(
                            dagster_event_sequence_for_step(step_context)
                        ):
                            check.inst(step_event, DagsterEvent)
                            if step_event.is_step_failure:
                                failed_or_skipped_steps.add(step.key)

                            yield step_event

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Finished steps in process (pid: {pid}) in {duration_ms}'.format(
                pid=os.getpid(), duration_ms=format_duration(timer_result.millis)
            ),
            event_specific_data=EngineEventData.in_process(os.getpid(), step_key_set),
        )
Exemplo n.º 9
0
    def test_fetch_records_by_update_timestamp(self, storage):
        assert storage
        self._skip_in_memory(storage)

        one = make_new_run_id()
        two = make_new_run_id()
        three = make_new_run_id()
        storage.add_run(
            TestRunStorage.build_run(
                run_id=one, pipeline_name="some_pipeline", status=PipelineRunStatus.STARTED
            )
        )
        storage.add_run(
            TestRunStorage.build_run(
                run_id=two, pipeline_name="some_pipeline", status=PipelineRunStatus.FAILURE
            )
        )
        storage.add_run(
            TestRunStorage.build_run(
                run_id=three, pipeline_name="some_pipeline", status=PipelineRunStatus.STARTED
            )
        )
        storage.handle_run_event(
            three,  # three succeeds
            DagsterEvent(
                message="a message",
                event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
                pipeline_name="some_pipeline",
            ),
        )
        storage.handle_run_event(
            one,  # fail one after two has fails and three has succeeded
            DagsterEvent(
                message="a message",
                event_type_value=DagsterEventType.PIPELINE_FAILURE.value,
                pipeline_name="some_pipeline",
            ),
        )

        record_two = storage.get_run_records(
            filters=PipelineRunsFilter(run_ids=[two], updated_after=datetime(2020, 1, 1))
        )[0]
        run_two_update_timestamp = record_two.update_timestamp

        assert [
            record.pipeline_run.run_id
            for record in storage.get_run_records(
                filters=PipelineRunsFilter(updated_after=run_two_update_timestamp),
                order_by="update_timestamp",
                ascending=True,
            )
        ] == [three, one]

        assert [
            record.pipeline_run.run_id
            for record in storage.get_run_records(
                filters=PipelineRunsFilter(
                    statuses=[PipelineRunStatus.FAILURE], updated_after=run_two_update_timestamp
                ),
            )
        ] == [one]
Exemplo n.º 10
0
    def event_generator(
        self,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
        scoped_resources_builder_cm,
        intermediate_storage=None,
        raise_on_error=False,
        resource_instances_to_override=None,
        output_capture=None,
    ):
        execution_plan = check.inst_param(execution_plan, "execution_plan",
                                          ExecutionPlan)
        pipeline_def = execution_plan.pipeline.get_definition()

        run_config = check.dict_param(run_config, "run_config", key_type=str)
        pipeline_run = check.inst_param(pipeline_run, "pipeline_run",
                                        PipelineRun)
        instance = check.inst_param(instance, "instance", DagsterInstance)

        scoped_resources_builder_cm = check.callable_param(
            scoped_resources_builder_cm, "scoped_resources_builder_cm")
        intermediate_storage = check.opt_inst_param(
            intermediate_storage, "intermediate_storage_data",
            IntermediateStorage)
        raise_on_error = check.bool_param(raise_on_error, "raise_on_error")
        resource_instances_to_override = check.opt_dict_param(
            resource_instances_to_override, "resource_instances_to_override")

        execution_context = None
        resources_manager = None

        try:
            context_creation_data = create_context_creation_data(
                execution_plan,
                run_config,
                pipeline_run,
                instance,
            )

            log_manager = create_log_manager(context_creation_data)
            resource_defs = execution_plan.pipeline_def.get_mode_definition(
                context_creation_data.environment_config.mode).resource_defs
            resources_manager = scoped_resources_builder_cm(
                resource_defs=resource_defs,
                resource_configs=context_creation_data.environment_config.
                resources,
                log_manager=log_manager,
                execution_plan=execution_plan,
                pipeline_run=context_creation_data.pipeline_run,
                resource_keys_to_init=context_creation_data.
                resource_keys_to_init,
                instance=instance,
                resource_instances_to_override=resource_instances_to_override,
                emit_persistent_events=True,
            )
            yield from resources_manager.generate_setup_events()
            scoped_resources_builder = check.inst(
                resources_manager.get_object(), ScopedResourcesBuilder)

            intermediate_storage = create_intermediate_storage(
                context_creation_data,
                intermediate_storage,
                scoped_resources_builder,
            )

            execution_context = self.construct_context(
                context_creation_data=context_creation_data,
                scoped_resources_builder=scoped_resources_builder,
                log_manager=log_manager,
                intermediate_storage=intermediate_storage,
                raise_on_error=raise_on_error,
                output_capture=output_capture,
            )

            _validate_plan_with_context(execution_context, execution_plan)

            yield execution_context
            yield from resources_manager.generate_teardown_events()
        except DagsterError as dagster_error:
            if execution_context is None:
                user_facing_exc_info = (
                    # pylint does not know original_exc_info exists is is_user_code_error is true
                    # pylint: disable=no-member
                    dagster_error.original_exc_info
                    if dagster_error.is_user_code_error else sys.exc_info())
                error_info = serializable_error_info_from_exc_info(
                    user_facing_exc_info)

                yield DagsterEvent.pipeline_init_failure(
                    pipeline_name=pipeline_def.name,
                    failure_data=PipelineInitFailureData(error=error_info),
                    log_manager=_create_context_free_log_manager(
                        instance, pipeline_run, pipeline_def),
                )
                if resources_manager:
                    yield from resources_manager.generate_teardown_events()
            else:
                # pipeline teardown failure
                raise dagster_error

            if raise_on_error:
                raise dagster_error
Exemplo n.º 11
0
def inner_plan_execution_iterator(pipeline_context, execution_plan):
    check.inst_param(pipeline_context, "pipeline_context", SystemExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    retries = pipeline_context.retries

    yield from copy_required_intermediates_for_execution(pipeline_context, execution_plan)

    with execution_plan.start(retries=retries) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = pipeline_context.for_step(step)
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                (
                    "Expected step context for solid {solid_name} to have all required resources, but "
                    "missing {missing_resources}."
                ).format(solid_name=step_context.solid.name, missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with pipeline_context.instance.compute_log_manager.watch(
                step_context.pipeline_run, step_context.step.key
            ):
                missing_input_sources = pipeline_context.intermediate_storage.get_missing_input_sources(
                    step_context, step
                )
                if missing_input_sources:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_sources_from_optional_outputs(
                        missing_input_sources, execution_plan, step.key
                    )

                    step_context.log.info(
                        (
                            "Not all inputs covered for {step}. Not executing. Sources missing: {missing_input_sources}"
                        ).format(missing_input_sources=missing_input_sources, step=step.key)
                    )
                    step_event = DagsterEvent.step_skipped_event(step_context)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.mark_skipped(step.key)
                else:
                    for step_event in check.generator(
                        _dagster_event_sequence_for_step(step_context, retries)
                    ):
                        check.inst(step_event, DagsterEvent)
                        step_event_list.append(step_event)
                        yield step_event
                        active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event
Exemplo n.º 12
0
def inner_plan_execution_iterator(
        pipeline_context: PlanExecutionContext,
        execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]:
    check.inst_param(pipeline_context, "pipeline_context",
                     PlanExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

    with execution_plan.start(
            retry_mode=pipeline_context.retry_mode) as active_execution:

        # It would be good to implement a reference tracking algorithm here to
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        while not active_execution.is_complete:
            step = active_execution.get_next_step()
            step_context = cast(
                StepExecutionContext,
                pipeline_context.for_step(
                    step,
                    active_execution.retry_state.get_attempt_count(step.key)),
            )
            step_event_list = []

            missing_resources = [
                resource_key
                for resource_key in step_context.required_resource_keys
                if not hasattr(step_context.resources, resource_key)
            ]
            check.invariant(
                len(missing_resources) == 0,
                ("Expected step context for solid {solid_name} to have all required resources, but "
                 "missing {missing_resources}.").format(
                     solid_name=step_context.solid.name,
                     missing_resources=missing_resources),
            )

            # capture all of the logs for this step
            with pipeline_context.instance.compute_log_manager.watch(
                    step_context.pipeline_run, step_context.step.key):
                yield DagsterEvent.capture_logs(step_context,
                                                log_key=step_context.step.key,
                                                steps=[step_context.step])

                for step_event in check.generator(
                        _dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    step_event_list.append(step_event)
                    yield step_event
                    active_execution.handle_event(step_event)

                active_execution.verify_complete(pipeline_context, step.key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                step_event_list.append(event)
                yield event

            # pass a list of step events to hooks
            for hook_event in _trigger_hook(step_context, step_event_list):
                yield hook_event
Exemplo n.º 13
0
def pipeline_initialization_event_generator(
    pipeline_def,
    environment_dict,
    pipeline_run,
    instance,
    execution_plan,
    scoped_resources_builder_cm,
    system_storage_data=None,
    raise_on_error=False,
):
    pipeline_def = check.inst_param(pipeline_def, 'pipeline_def',
                                    PipelineDefinition)
    environment_dict = check.dict_param(environment_dict,
                                        'environment_dict',
                                        key_type=str)
    pipeline_run = check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)
    instance = check.inst_param(instance, 'instance', DagsterInstance)
    execution_plan = check.inst_param(execution_plan, 'execution_plan',
                                      ExecutionPlan)
    scoped_resources_builder_cm = check.callable_param(
        scoped_resources_builder_cm, 'scoped_resources_builder_cm')
    system_storage_data = check.opt_inst_param(system_storage_data,
                                               'system_storage_data',
                                               SystemStorageData)
    raise_on_error = check.bool_param(raise_on_error, 'raise_on_error')

    pipeline_context = None
    resources_manager = None

    try:
        context_creation_data = create_context_creation_data(
            pipeline_def,
            environment_dict,
            pipeline_run,
            instance,
            execution_plan,
        )
        executor_config = create_executor_config(context_creation_data)
        log_manager = create_log_manager(context_creation_data)
        resources_manager = scoped_resources_builder_cm(
            execution_plan,
            context_creation_data.environment_config,
            context_creation_data.pipeline_run,
            log_manager,
            context_creation_data.resource_keys_to_init,
        )
        for event in resources_manager.generate_setup_events():
            yield event
        scoped_resources_builder = check.inst(resources_manager.get_object(),
                                              ScopedResourcesBuilder)
        system_storage_data = create_system_storage_data(
            context_creation_data, system_storage_data,
            scoped_resources_builder)
        pipeline_context = construct_pipeline_execution_context(
            context_creation_data=context_creation_data,
            scoped_resources_builder=scoped_resources_builder,
            system_storage_data=system_storage_data,
            log_manager=log_manager,
            executor_config=executor_config,
            raise_on_error=raise_on_error,
        )

        _validate_plan_with_context(pipeline_context, execution_plan)

        yield pipeline_context
        for event in resources_manager.generate_teardown_events():
            yield event
    except DagsterError as dagster_error:
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(
                    instance, pipeline_run, pipeline_def),
            )
            if resources_manager:
                for event in resources_manager.generate_teardown_events():
                    yield event
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Exemplo n.º 14
0
def _core_celery_execution_loop(pipeline_context, execution_plan, step_execution_fn):
    from .tasks import make_app

    check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.callable_param(step_execution_fn, 'step_execution_fn')

    check.param_invariant(
        isinstance(pipeline_context.executor_config, (CeleryConfig, CeleryK8sJobConfig)),
        'pipeline_context',
        'Expected executor_config to be Celery config got {}'.format(
            pipeline_context.executor_config
        ),
    )

    celery_config = pipeline_context.executor_config

    # https://github.com/dagster-io/dagster/issues/2440
    check.invariant(
        pipeline_context.system_storage_def.is_persistent,
        'Cannot use in-memory storage with Celery, use filesystem (on top of NFS or '
        'similar system that allows files to be available to all nodes), S3, or GCS',
    )

    app = make_app(celery_config)

    priority_for_step = lambda step: (
        -1 * int(step.tags.get(DAGSTER_CELERY_STEP_PRIORITY_TAG, task_default_priority))
        + -1 * _get_run_priority(pipeline_context)
    )
    priority_for_key = lambda step_key: (
        priority_for_step(execution_plan.get_step_by_key(step_key))
    )
    _warn_on_priority_misuse(pipeline_context, execution_plan)

    step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
    step_errors = {}
    completed_steps = set({})  # Set[step_key]
    active_execution = execution_plan.start(
        retries=pipeline_context.executor_config.retries, sort_key_fn=priority_for_step
    )
    stopping = False

    while (not active_execution.is_complete and not stopping) or step_results:

        results_to_pop = []
        for step_key, result in sorted(step_results.items(), key=lambda x: priority_for_key(x[0])):
            if result.ready():
                try:
                    step_events = result.get()
                except Exception:  # pylint: disable=broad-except
                    # We will want to do more to handle the exception here.. maybe subclass Task
                    # Certainly yield an engine or pipeline event
                    step_events = []
                    step_errors[step_key] = serializable_error_info_from_exc_info(sys.exc_info())
                    stopping = True
                for step_event in step_events:
                    event = deserialize_json_to_dagster_namedtuple(step_event)
                    yield event
                    active_execution.handle_event(event)

                results_to_pop.append(step_key)
                completed_steps.add(step_key)

        for step_key in results_to_pop:
            if step_key in step_results:
                del step_results[step_key]
                active_execution.verify_complete(pipeline_context, step_key)

        # process skips from failures or uncovered inputs
        for event in active_execution.skipped_step_events_iterator(pipeline_context):
            yield event

        # don't add any new steps if we are stopping
        if stopping:
            continue

        # This is a slight refinement. If we have n workers idle and schedule m > n steps for
        # execution, the first n steps will be picked up by the idle workers in the order in
        # which they are scheduled (and the following m-n steps will be executed in priority
        # order, provided that it takes longer to execute a step than to schedule it). The test
        # case has m >> n to exhibit this behavior in the absence of this sort step.
        for step in active_execution.get_steps_to_execute():
            try:
                queue = step.tags.get(DAGSTER_CELERY_QUEUE_TAG, task_default_queue)
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    'Submitting celery task for step "{step_key}" to queue "{queue}".'.format(
                        step_key=step.key, queue=queue
                    ),
                    EngineEventData(marker_start=DELEGATE_MARKER),
                    step_key=step.key,
                )

                # Get the Celery priority for this step
                priority = _get_step_priority(pipeline_context, step)

                # Submit the Celery tasks
                step_results[step.key] = step_execution_fn(
                    app, pipeline_context, step, queue, priority
                )

            except Exception:
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    'Encountered error during celery task submission.'.format(),
                    event_specific_data=EngineEventData.engine_error(
                        serializable_error_info_from_exc_info(sys.exc_info()),
                    ),
                )
                raise

        time.sleep(TICK_SECONDS)

    if step_errors:
        raise DagsterSubprocessError(
            'During celery execution errors occurred in workers:\n{error_list}'.format(
                error_list='\n'.join(
                    [
                        '[{step}]: {err}'.format(step=key, err=err.to_string())
                        for key, err in step_errors.items()
                    ]
                )
            ),
            subprocess_error_infos=list(step_errors.values()),
        )
Exemplo n.º 15
0
def test_multiline_logging_complex():
    msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0'
    kwargs = {
        'pipeline':
        'example',
        'pipeline_name':
        'example',
        'step_key':
        'start.materialization.output.result.0',
        'solid':
        'start',
        'solid_definition':
        'emit_num',
        'dagster_event':
        DagsterEvent(
            event_type_value='STEP_FAILURE',
            pipeline_name='error_monster',
            step_key='start.materialization.output.result.0',
            solid_handle=SolidHandle('start', 'emit_num', None),
            step_kind_value='MATERIALIZATION_THUNK',
            logging_tags={
                'pipeline': 'error_monster',
                'step_key': 'start.materialization.output.result.0',
                'solid': 'start',
                'solid_definition': 'emit_num',
            },
            event_specific_data=StepFailureData(
                error=SerializableErrorInfo(
                    message=
                    "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                    stack=[
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n    yield\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _event_sequence_for_step_compute_fn\n    for step_output in gen:\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n    runtime_type.output_materialization_config.materialize_runtime_value(config_spec, runtime_value)\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n    return func(config_value, runtime_value)\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n    return func(selector_key, selector_value, runtime_value)\n',
                        '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n    with open(json_file_path, \'w\') as ff:\n',
                    ],
                    cls_name='FileNotFoundError',
                ),
                user_failure_data=None,
            ),
        ),
    }

    with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger):

        dl = DagsterLogManager('123', {}, [logger])
        dl.info(msg, **kwargs)

        kv_pairs = set(captured_results[0].split('\n')[1:])

    expected_pairs = [
        '        orig_message = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"',
        '              run_id = "123"',
        '            pipeline = "example"',
        '    solid_definition = "emit_num"',
        '       pipeline_name = "example"',
        '               solid = "start"',
        '            step_key = "start.materialization.output.result.0"',
    ]
    for e in expected_pairs:
        assert e in kv_pairs

    assert _regex_match_kv_pair(
        r'      log_message_id = "{0}"'.format(REGEX_UUID), kv_pairs)
    assert _regex_match_kv_pair(
        r'       log_timestamp = "{0}"'.format(REGEX_TS), kv_pairs)

    expected_dagster_event = {
        'event_specific_data': [
            [
                "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                [
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n    yield\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _event_sequence_for_step_compute_fn\n    for step_output in gen:\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n    runtime_type.output_materialization_config.materialize_runtime_value(config_spec, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n    return func(config_value, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n    return func(selector_key, selector_value, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n    with open(json_file_path, \'w\') as ff:\n',
                ],
                'FileNotFoundError',
            ],
            None,  # user_failure_data
        ],
        'event_type_value':
        'STEP_FAILURE',
        'message':
        None,
        'pipeline_name':
        'error_monster',
        'solid_handle': ['start', 'emit_num', None],
        'step_key':
        'start.materialization.output.result.0',
        'step_kind_value':
        'MATERIALIZATION_THUNK',
        'logging_tags': {
            'pipeline': 'error_monster',
            'solid': 'start',
            'solid_definition': 'emit_num',
            'step_key': 'start.materialization.output.result.0',
        },
    }
    dagster_event = json.loads([
        pair for pair in kv_pairs if 'dagster_event' in pair
    ][0].strip('       dagster_event = '))
    assert dagster_event == expected_dagster_event
Exemplo n.º 16
0
    def execute(self, plan_context: PlanOrchestrationContext,
                execution_plan: ExecutionPlan):
        check.inst_param(plan_context, "plan_context",
                         PlanOrchestrationContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        self._event_cursor = -1  # pylint: disable=attribute-defined-outside-init

        yield DagsterEvent.engine_event(
            plan_context,
            f"Starting execution with step handler {self._step_handler.name}",
            EngineEventData(),
        )

        with execution_plan.start(retry_mode=self.retries) as active_execution:
            running_steps: Dict[str, ExecutionStep] = {}

            if plan_context.resume_from_failure:
                yield DagsterEvent.engine_event(
                    plan_context,
                    "Resuming execution from failure",
                    EngineEventData(),
                )

                prior_events = self._pop_events(
                    plan_context.instance,
                    plan_context.run_id,
                )
                for dagster_event in prior_events:
                    yield dagster_event

                possibly_in_flight_steps = active_execution.rebuild_from_events(
                    prior_events)
                for step in possibly_in_flight_steps:

                    yield DagsterEvent.engine_event(
                        plan_context,
                        "Checking on status of possibly launched steps",
                        EngineEventData(),
                        step.handle,
                    )

                    # TODO: check if failure event included. For now, hacky assumption that
                    # we don't log anything on successful check
                    if self._step_handler.check_step_health(
                            self._get_step_handler_context(
                                plan_context, [step], active_execution)):
                        # health check failed, launch the step
                        self._log_new_events(
                            self._step_handler.launch_step(
                                self._get_step_handler_context(
                                    plan_context, [step], active_execution)),
                            plan_context,
                            {
                                step.key: step
                                for step in possibly_in_flight_steps
                            },
                        )

                    running_steps[step.key] = step

            last_check_step_health_time = pendulum.now("UTC")

            # Order of events is important here. During an interation, we call handle_event, then get_steps_to_execute,
            # then is_complete. get_steps_to_execute updates the state of ActiveExecution, and without it
            # is_complete can return true when we're just between steps.
            while not active_execution.is_complete:

                if active_execution.check_for_interrupts():
                    if not plan_context.instance.run_will_resume(
                            plan_context.run_id):
                        yield DagsterEvent.engine_event(
                            plan_context,
                            "Executor received termination signal, forwarding to steps",
                            EngineEventData.interrupted(
                                list(running_steps.keys())),
                        )
                        active_execution.mark_interrupted()
                        for _, step in running_steps.items():
                            self._log_new_events(
                                self._step_handler.terminate_step(
                                    self._get_step_handler_context(
                                        plan_context, [step],
                                        active_execution)),
                                plan_context,
                                running_steps,
                            )

                    else:
                        yield DagsterEvent.engine_event(
                            plan_context,
                            "Executor received termination signal, not forwarding to steps because "
                            "run will be resumed",
                            EngineEventData(metadata_entries=[
                                EventMetadataEntry.text(
                                    str(running_steps.keys()),
                                    "steps_in_flight")
                            ]),
                        )
                        active_execution.mark_interrupted()

                    return

                for dagster_event in self._pop_events(
                        plan_context.instance,
                        plan_context.run_id,
                ):  # type: ignore

                    # STEP_SKIPPED events are only emitted by ActiveExecution, which already handles
                    # and yields them.
                    if dagster_event.is_step_skipped:
                        assert isinstance(dagster_event.step_key, str)
                        active_execution.verify_complete(
                            plan_context, dagster_event.step_key)

                    else:
                        yield dagster_event
                        active_execution.handle_event(dagster_event)

                        if dagster_event.is_step_success or dagster_event.is_step_failure:
                            assert isinstance(dagster_event.step_key, str)
                            del running_steps[dagster_event.step_key]
                            active_execution.verify_complete(
                                plan_context, dagster_event.step_key)

                # process skips from failures or uncovered inputs
                for event in active_execution.plan_events_iterator(
                        plan_context):
                    yield event

                curr_time = pendulum.now("UTC")
                if (curr_time - last_check_step_health_time).total_seconds(
                ) >= self._check_step_health_interval_seconds:
                    last_check_step_health_time = curr_time
                    for _, step in running_steps.items():
                        self._log_new_events(
                            self._step_handler.check_step_health(
                                self._get_step_handler_context(
                                    plan_context, [step], active_execution)),
                            plan_context,
                            running_steps,
                        )

                for step in active_execution.get_steps_to_execute():
                    running_steps[step.key] = step
                    self._log_new_events(
                        self._step_handler.launch_step(
                            self._get_step_handler_context(
                                plan_context, [step], active_execution)),
                        plan_context,
                        running_steps,
                    )

                time.sleep(self._sleep_seconds)
Exemplo n.º 17
0
def host_mode_execution_context_event_generator(
    pipeline,
    execution_plan,
    run_config,
    pipeline_run,
    instance,
    raise_on_error,
    executor_defs,
    output_capture,
    resume_from_failure: bool = False,
):
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.inst_param(pipeline, "pipeline", ReconstructablePipeline)

    check.dict_param(run_config, "run_config", key_type=str)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    executor_defs = check.list_param(executor_defs,
                                     "executor_defs",
                                     of_type=ExecutorDefinition)
    check.bool_param(raise_on_error, "raise_on_error")
    check.invariant(output_capture is None)

    execution_context = None

    loggers = []

    for (logger_def, logger_config) in default_system_loggers():
        loggers.append(
            logger_def.logger_fn(
                InitLoggerContext(
                    logger_config,
                    pipeline_def=None,
                    logger_def=logger_def,
                    run_id=pipeline_run.run_id,
                )))

    log_manager = DagsterLogManager.create(loggers=loggers,
                                           pipeline_run=pipeline_run,
                                           instance=instance)

    try:
        executor = _get_host_mode_executor(pipeline, run_config, executor_defs,
                                           instance)
        execution_context = PlanOrchestrationContext(
            plan_data=PlanData(
                pipeline=pipeline,
                pipeline_run=pipeline_run,
                instance=instance,
                execution_plan=execution_plan,
                raise_on_error=raise_on_error,
                retry_mode=executor.retries,
            ),
            log_manager=log_manager,
            executor=executor,
            output_capture=None,
            resume_from_failure=resume_from_failure,
        )

        yield execution_context

    except DagsterError as dagster_error:
        if execution_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info  # type: ignore
                if dagster_error.is_user_code_error else sys.exc_info())
            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)

            event = DagsterEvent.pipeline_failure(
                pipeline_context_or_name=pipeline_run.pipeline_name,
                context_msg=
                (f'Pipeline failure during initialization for pipeline "{pipeline_run.pipeline_name}". '
                 "This may be due to a failure in initializing the executor or one of the loggers."
                 ),
                error_info=error_info,
            )
            log_manager.log_dagster_event(
                level=logging.ERROR,
                msg=event.message,
                dagster_event=event  # type: ignore
            )
            yield event
        else:
            # pipeline teardown failure
            raise dagster_error

        if raise_on_error:
            raise dagster_error
Exemplo n.º 18
0
def core_celery_execution_loop(pipeline_context, execution_plan,
                               step_execution_fn):

    check.inst_param(pipeline_context, "pipeline_context",
                     SystemPipelineExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.callable_param(step_execution_fn, "step_execution_fn")

    executor = pipeline_context.executor

    # https://github.com/dagster-io/dagster/issues/2440
    check.invariant(
        execution_plan.artifacts_persisted,
        "Cannot use in-memory storage with Celery, use filesystem (on top of NFS or "
        "similar system that allows files to be available to all nodes), S3, or GCS",
    )

    app = make_app(executor.app_args())

    priority_for_step = lambda step: (-1 * int(
        step.tags.get(DAGSTER_CELERY_STEP_PRIORITY_TAG, task_default_priority)
    ) + -1 * _get_run_priority(pipeline_context))
    priority_for_key = lambda step_key: (priority_for_step(
        execution_plan.get_step_by_key(step_key)))
    _warn_on_priority_misuse(pipeline_context, execution_plan)

    step_results = {}  # Dict[ExecutionStep, celery.AsyncResult]
    step_errors = {}

    with execution_plan.start(
            retries=pipeline_context.executor.retries,
            sort_key_fn=priority_for_step,
    ) as active_execution:

        stopping = False

        while (not active_execution.is_complete
               and not stopping) or step_results:
            if active_execution.check_for_interrupts():
                yield DagsterEvent.engine_event(
                    pipeline_context,
                    "Celery executor: received termination signal - revoking active tasks from workers",
                    EngineEventData.interrupted(list(step_results.keys())),
                )
                stopping = True
                active_execution.mark_interrupted()
                for result in step_results.values():
                    result.revoke()
            results_to_pop = []
            for step_key, result in sorted(
                    step_results.items(),
                    key=lambda x: priority_for_key(x[0])):
                if result.ready():
                    try:
                        step_events = result.get()
                    except TaskRevokedError:
                        step_events = []
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            'celery task for running step "{step_key}" was revoked.'
                            .format(step_key=step_key, ),
                            EngineEventData(marker_end=DELEGATE_MARKER),
                            step_handle=active_execution.get_step_by_key(
                                step_key).handle,
                        )
                    except Exception:  # pylint: disable=broad-except
                        # We will want to do more to handle the exception here.. maybe subclass Task
                        # Certainly yield an engine or pipeline event
                        step_events = []
                        step_errors[
                            step_key] = serializable_error_info_from_exc_info(
                                sys.exc_info())
                    for step_event in step_events:
                        event = deserialize_json_to_dagster_namedtuple(
                            step_event)
                        yield event
                        active_execution.handle_event(event)

                    results_to_pop.append(step_key)

            for step_key in results_to_pop:
                if step_key in step_results:
                    del step_results[step_key]
                    active_execution.verify_complete(pipeline_context,
                                                     step_key)

            # process skips from failures or uncovered inputs
            for event in active_execution.plan_events_iterator(
                    pipeline_context):
                yield event

            # don't add any new steps if we are stopping
            if stopping or step_errors:
                continue

            # This is a slight refinement. If we have n workers idle and schedule m > n steps for
            # execution, the first n steps will be picked up by the idle workers in the order in
            # which they are scheduled (and the following m-n steps will be executed in priority
            # order, provided that it takes longer to execute a step than to schedule it). The test
            # case has m >> n to exhibit this behavior in the absence of this sort step.
            for step in active_execution.get_steps_to_execute():
                try:
                    queue = step.tags.get(DAGSTER_CELERY_QUEUE_TAG,
                                          task_default_queue)
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Submitting celery task for step "{step_key}" to queue "{queue}".'
                        .format(step_key=step.key, queue=queue),
                        EngineEventData(marker_start=DELEGATE_MARKER),
                        step_handle=step.handle,
                    )

                    # Get the Celery priority for this step
                    priority = _get_step_priority(pipeline_context, step)

                    # Submit the Celery tasks
                    step_results[step.key] = step_execution_fn(
                        app, pipeline_context, step, queue, priority)

                except Exception:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Encountered error during celery task submission.".
                        format(),
                        event_specific_data=EngineEventData.engine_error(
                            serializable_error_info_from_exc_info(
                                sys.exc_info()), ),
                    )
                    raise

            time.sleep(TICK_SECONDS)

        if step_errors:
            raise DagsterSubprocessError(
                "During celery execution errors occurred in workers:\n{error_list}"
                .format(error_list="\n".join([
                    "[{step}]: {err}".format(step=key, err=err.to_string())
                    for key, err in step_errors.items()
                ])),
                subprocess_error_infos=list(step_errors.values()),
            )
Exemplo n.º 19
0
    def execute(pipeline_context, execution_plan):
        check.inst_param(pipeline_context, 'pipeline_context',
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        intermediates_manager = pipeline_context.intermediates_manager

        limit = pipeline_context.executor_config.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Executing steps using multiprocess engine: parent process (pid: {pid})'
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:

            active_execution = execution_plan.start(
                retries=pipeline_context.executor_config.retries)
            active_iters = {}
            errors = {}
            term_events = {}
            stopping = False

            while (not stopping
                   and not active_execution.is_complete) or active_iters:
                try:
                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[
                                step.key] = get_multiprocessing_context(
                                ).Event()
                            active_iters[
                                step.key] = execute_step_out_of_process(
                                    step_context, step, errors, term_events)

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        if term_events[key].is_set():
                            stopping = True
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skips from failures or uncovered inputs
                    for event in active_execution.skipped_step_events_iterator(
                            pipeline_context):
                        yield event

                # In the very small chance that we get interrupted in this coordination section and not
                # polling the subprocesses for events - try to clean up gracefully
                except KeyboardInterrupt:
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        'Multiprocess engine: received KeyboardInterrupt - forwarding to active child processes',
                        EngineEventData.interrupted(list(term_events.keys())),
                    )
                    stopping = True
                    for event in term_events.values():
                        event.set()

            errs = {pid: err for pid, err in errors.items() if err}
            if errs:
                raise DagsterSubprocessError(
                    'During multiprocess execution errors occurred in child processes:\n{error_list}'
                    .format(error_list='\n'.join([
                        'In process {pid}: {err}'.format(pid=pid,
                                                         err=err.to_string())
                        for pid, err in errs.items()
                    ])),
                    subprocess_error_infos=list(errs.values()),
                )

        yield DagsterEvent.engine_event(
            pipeline_context,
            'Multiprocess engine: parent process exiting after {duration} (pid: {pid})'
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
Exemplo n.º 20
0
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count):
    """
    Execute the step within the step_context argument given the in-memory
    events. This function yields a sequence of DagsterEvents, but without
    catching any exceptions that have bubbled up during the computation
    of the step.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.int_param(prior_attempt_count, "prior_attempt_count")
    if prior_attempt_count > 0:
        yield DagsterEvent.step_restarted_event(step_context,
                                                prior_attempt_count)
    else:
        yield DagsterEvent.step_start_event(step_context)

    inputs = {}
    for input_name, input_value in _input_values_from_intermediate_storage(
            step_context):
        if isinstance(input_value, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(input_value,
                                                  value_name=input_name))
            inputs[input_name] = input_value.obj
        elif isinstance(input_value, MultipleStepOutputsListWrapper):
            for op in input_value:
                if isinstance(input_value, ObjectStoreOperation):
                    yield DagsterEvent.object_store_operation(
                        step_context,
                        ObjectStoreOperation.serializable(
                            op, value_name=input_name))
                elif isinstance(input_value, AssetStoreOperation):
                    yield DagsterEvent.asset_store_operation(
                        step_context, input_value)
            inputs[input_name] = [op.obj for op in input_value]
        elif isinstance(input_value, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, input_value)
            inputs[input_name] = input_value.obj
        else:
            inputs[input_name] = input_value

    for input_name, input_value in inputs.items():
        for evt in check.generator(
                _type_checked_event_sequence_for_input(step_context,
                                                       input_name,
                                                       input_value)):
            yield evt

    with time_execution_scope() as timer_result:
        user_event_sequence = check.generator(
            _user_event_sequence_for_step_compute_fn(step_context, inputs))

        # It is important for this loop to be indented within the
        # timer block above in order for time to be recorded accurately.
        for user_event in check.generator(
                _step_output_error_checked_user_event_sequence(
                    step_context, user_event_sequence)):

            if isinstance(user_event, Output):
                for evt in _create_step_events_for_output(
                        step_context, user_event):
                    yield evt
            elif isinstance(user_event,
                            (AssetMaterialization, Materialization)):
                yield DagsterEvent.step_materialization(
                    step_context, user_event)
            elif isinstance(user_event, ExpectationResult):
                yield DagsterEvent.step_expectation_result(
                    step_context, user_event)
            else:
                check.failed(
                    "Unexpected event {event}, should have been caught earlier"
                    .format(event=user_event))

    yield DagsterEvent.step_success_event(
        step_context, StepSuccessData(duration_ms=timer_result.millis))
Exemplo n.º 21
0
    def execute(pipeline_context, execution_plan, step_keys_to_execute=None):
        check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext)
        check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
        check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str)

        step_key_set = None if step_keys_to_execute is None else set(step_keys_to_execute)

        check.param_invariant(
            isinstance(pipeline_context.executor_config, ExecutorConfig),
            'pipeline_context',
            'Expected executor_config to be ExecutorConfig got {}'.format(
                pipeline_context.executor_config
            ),
        )

        failed_or_skipped_steps = set()

        step_levels = execution_plan.topological_step_levels()

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collection results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        for step_level in step_levels:
            for step in step_level:
                if step_key_set and step.key not in step_key_set:
                    continue

                step_context = pipeline_context.for_step(step)

                failed_inputs = [
                    step_input.prev_output_handle.step_key
                    for step_input in step.step_inputs
                    if step_input.is_from_output
                    and step_input.prev_output_handle.step_key in failed_or_skipped_steps
                ]
                if failed_inputs:
                    step_context.log.info(
                        (
                            'Dependencies for step {step} failed: {failed_inputs}. Not executing.'
                        ).format(step=step.key, failed_inputs=failed_inputs)
                    )
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                uncovered_inputs = pipeline_context.intermediates_manager.uncovered_inputs(
                    step_context, step
                )
                if uncovered_inputs:
                    # In partial pipeline execution, we may end up here without having validated the
                    # missing dependent outputs were optional
                    _assert_missing_inputs_optional(uncovered_inputs, execution_plan, step.key)

                    step_context.log.info(
                        (
                            'Not all inputs covered for {step}. Not executing. Output missing for '
                            'inputs: {uncovered_inputs}'
                        ).format(uncovered_inputs=uncovered_inputs, step=step.key)
                    )
                    failed_or_skipped_steps.add(step.key)
                    yield DagsterEvent.step_skipped_event(step_context)
                    continue

                for step_event in check.generator(dagster_event_sequence_for_step(step_context)):
                    check.inst(step_event, DagsterEvent)
                    if step_event.is_step_failure:
                        failed_or_skipped_steps.add(step.key)

                    yield step_event
Exemplo n.º 22
0
def resource_initialization_event_generator(
    resource_defs: Dict[str, ResourceDefinition],
    resource_configs: Dict[str, ResourceConfig],
    log_manager: DagsterLogManager,
    execution_plan: Optional[ExecutionPlan],
    pipeline_run: Optional[PipelineRun],
    resource_keys_to_init: Optional[Set[str]],
    instance: Optional[DagsterInstance],
    resource_instances_to_override: Optional[Dict[str, "InitializedResource"]],
    emit_persistent_events: Optional[bool],
):
    check.inst_param(log_manager, "log_manager", DagsterLogManager)
    resource_keys_to_init = check.opt_set_param(resource_keys_to_init,
                                                "resource_keys_to_init",
                                                of_type=str)
    check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan)
    check.opt_inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.opt_inst_param(instance, "instance", DagsterInstance)
    check.opt_dict_param(resource_instances_to_override,
                         "resource_instances_to_override")

    if execution_plan and execution_plan.step_handle_for_single_step_plans():
        step = execution_plan.get_step(
            cast(
                StepHandleUnion,
                cast(ExecutionPlan,
                     execution_plan).step_handle_for_single_step_plans(),
            ))
        resource_log_manager = log_manager.with_tags(
            **cast(ExecutionStep, step).logging_tags)
    else:
        resource_log_manager = log_manager

    generator_closed = False
    resource_managers: Deque[EventGenerationManager] = deque()

    try:

        yield from _core_resource_initialization_event_generator(
            resource_defs=resource_defs,
            resource_configs=resource_configs,
            resource_log_manager=resource_log_manager,
            resource_managers=resource_managers,
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            resource_keys_to_init=resource_keys_to_init,
            instance=instance,
            resource_instances_to_override=resource_instances_to_override,
            emit_persistent_events=emit_persistent_events,
        )
    except GeneratorExit:
        # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed
        # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/).
        generator_closed = True
        raise
    finally:
        if not generator_closed:
            error = None
            while len(resource_managers) > 0:
                manager = resource_managers.pop()
                try:
                    yield from manager.generate_teardown_events()
                except DagsterUserCodeExecutionError as dagster_user_error:
                    error = dagster_user_error
            if error:
                yield DagsterEvent.resource_teardown_failure(
                    execution_plan,
                    resource_log_manager,
                    resource_keys_to_init,
                    serializable_error_info_from_exc_info(
                        error.original_exc_info),
                )
Exemplo n.º 23
0
def _core_resource_initialization_event_generator(
    execution_plan,
    environment_config,
    pipeline_run,
    resource_keys_to_init,
    resource_log_manager,
    resource_managers,
):
    pipeline_def = execution_plan.pipeline_def
    resource_instances = {}
    mode_definition = pipeline_def.get_mode_definition(pipeline_run.mode)
    resource_init_times = {}
    try:
        if resource_keys_to_init:
            yield DagsterEvent.resource_init_start(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
            )

        for resource_name, resource_def in sorted(
                mode_definition.resource_defs.items()):
            if not resource_name in resource_keys_to_init:
                continue

            resource_context = InitResourceContext(
                pipeline_def=pipeline_def,
                resource_def=resource_def,
                resource_config=environment_config.resources.get(
                    resource_name, {}).get("config"),
                run_id=pipeline_run.run_id,
                # Add tags with information about the resource
                log_manager=resource_log_manager.with_tags(
                    resource_name=resource_name,
                    resource_fn_name=str(resource_def.resource_fn.__name__),
                ),
            )
            manager = single_resource_generation_manager(
                resource_context, resource_name, resource_def)
            for event in manager.generate_setup_events():
                if event:
                    yield event
            initialized_resource = check.inst(manager.get_object(),
                                              InitializedResource)
            resource_instances[resource_name] = initialized_resource.resource
            resource_init_times[resource_name] = initialized_resource.duration
            resource_managers.append(manager)

        if resource_keys_to_init:
            yield DagsterEvent.resource_init_success(execution_plan,
                                                     resource_log_manager,
                                                     resource_instances,
                                                     resource_init_times)
        yield ScopedResourcesBuilder(resource_instances)
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield DagsterEvent.resource_init_failure(
            execution_plan,
            resource_log_manager,
            resource_keys_to_init,
            serializable_error_info_from_exc_info(
                dagster_user_error.original_exc_info),
        )
        raise dagster_user_error
Exemplo n.º 24
0
def _core_resource_initialization_event_generator(
    resource_defs: Dict[str, ResourceDefinition],
    resource_configs: Dict[str, ResourceConfig],
    resource_log_manager: DagsterLogManager,
    resource_managers: Deque[EventGenerationManager],
    execution_plan: Optional[ExecutionPlan],
    pipeline_run: Optional[PipelineRun],
    resource_keys_to_init: Optional[Set[str]],
    instance: Optional[DagsterInstance],
    resource_instances_to_override: Optional[Dict[str, "InitializedResource"]],
    emit_persistent_events: Optional[bool],
):
    if emit_persistent_events:
        check.invariant(
            execution_plan,
            "If emit_persistent_events is enabled, then execution_plan must be provided",
        )
    resource_instances_to_override = check.opt_dict_param(
        resource_instances_to_override, "resource_instances_to_override")
    resource_keys_to_init = check.opt_set_param(resource_keys_to_init,
                                                "resource_keys_to_init")
    resource_instances: Dict[str, "InitializedResource"] = {}
    resource_init_times = {}
    try:
        if emit_persistent_events and resource_keys_to_init:
            yield DagsterEvent.resource_init_start(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
            )

        resource_dependencies = _resolve_resource_dependencies(resource_defs)

        for level in toposort(resource_dependencies):
            for resource_name in level:

                if resource_name in resource_instances_to_override:
                    # use the given resource instances instead of re-initiating it from resource def
                    resource_def = ResourceDefinition.hardcoded_resource(
                        resource_instances_to_override[resource_name])
                else:
                    resource_def = resource_defs[resource_name]
                if not resource_name in resource_keys_to_init:
                    continue

                resource_context = InitResourceContext(
                    resource_def=resource_def,
                    resource_config=resource_configs[resource_name].config,
                    pipeline_run=pipeline_run,
                    # Add tags with information about the resource
                    log_manager=resource_log_manager.with_tags(
                        resource_name=resource_name,
                        resource_fn_name=str(
                            resource_def.resource_fn.__name__),
                    ),
                    resource_instance_dict=resource_instances,
                    required_resource_keys=resource_def.required_resource_keys,
                    instance=instance,
                    pipeline_def_for_backwards_compat=execution_plan.
                    pipeline_def if execution_plan else None,
                )
                manager = single_resource_generation_manager(
                    resource_context, resource_name, resource_def)
                for event in manager.generate_setup_events():
                    if event:
                        yield event
                initialized_resource = check.inst(manager.get_object(),
                                                  InitializedResource)
                resource_instances[
                    resource_name] = initialized_resource.resource
                resource_init_times[
                    resource_name] = initialized_resource.duration
                resource_managers.append(manager)

        if emit_persistent_events and resource_keys_to_init:
            yield DagsterEvent.resource_init_success(execution_plan,
                                                     resource_log_manager,
                                                     resource_instances,
                                                     resource_init_times)
        yield ScopedResourcesBuilder(resource_instances)
    except DagsterUserCodeExecutionError as dagster_user_error:
        # Can only end up in this state if we attempt to initialize a resource, so
        # resource_keys_to_init cannot be empty
        if emit_persistent_events:
            yield DagsterEvent.resource_init_failure(
                execution_plan,
                resource_log_manager,
                resource_keys_to_init,
                serializable_error_info_from_exc_info(
                    dagster_user_error.original_exc_info),
            )
        raise dagster_user_error
Exemplo n.º 25
0
def _dagster_event_sequence_for_step(step_context, retries):
    '''
    Yield a sequence of dagster events for the given step with the step context.

    This function also processes errors. It handles a few error cases:

        (1) User code requests to be retried:
            A RetryRequested has been raised. We will either put the step in to
            up_for_retry state or a failure state depending on the number of previous attempts
            and the max_retries on the received RetryRequested.

        (2) User code fails successfully:
            The user-space code has raised a Failure which may have
            explicit metadata attached.

        (3) User code fails unexpectedly:
            The user-space code has raised an Exception. It has been
            wrapped in an exception derived from DagsterUserCodeException. In that
            case the original user exc_info is stashed on the exception
            as the original_exc_info property.

        (4) User error:
            The framework raised a DagsterError that indicates a usage error
            or some other error not communicated by a user-thrown exception. For example,
            if the user yields an object out of a compute function that is not a
            proper event (not an Output, ExpectationResult, etc).

        (5) Framework failure or interrupt:
            An unexpected error occurred. This is a framework error. Either there
            has been an internal error in the framework OR we have forgotten to put a
            user code error boundary around invoked user-space code. These terminate
            the computation immediately (by re-raising).


    The "raised_dagster_errors" context manager can be used to force these errors to be
    re-raised and surfaced to the user. This is mostly to get sensible errors in test and
    ad-hoc contexts, rather than forcing the user to wade through the
    PipelineExecutionResult API in order to find the step that failed.

    For tools, however, this option should be false, and a sensible error message
    signaled to the user within that tool.
    '''

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    check.inst_param(retries, 'retries', Retries)

    try:
        prior_attempt_count = retries.get_attempt_count(step_context.step.key)
        if step_context.step_launcher:
            step_events = step_context.step_launcher.launch_step(
                step_context, prior_attempt_count)
        else:
            step_events = core_dagster_event_sequence_for_step(
                step_context, prior_attempt_count)

        for step_event in check.generator(step_events):
            yield step_event

    # case (1) in top comment
    except RetryRequested as retry_request:
        retry_err_info = serializable_error_info_from_exc_info(sys.exc_info())

        if retries.disabled:
            fail_err = SerializableErrorInfo(
                message='RetryRequested but retries are disabled',
                stack=retry_err_info.stack,
                cls_name=retry_err_info.cls_name,
                cause=retry_err_info.cause,
            )
            yield DagsterEvent.step_failure_event(
                step_context=step_context,
                step_failure_data=StepFailureData(error=fail_err,
                                                  user_failure_data=None),
            )
        else:  # retries.enabled or retries.deferred
            prev_attempts = retries.get_attempt_count(step_context.step.key)
            if prev_attempts >= retry_request.max_retries:
                fail_err = SerializableErrorInfo(
                    message='Exceeded max_retries of {}'.format(
                        retry_request.max_retries),
                    stack=retry_err_info.stack,
                    cls_name=retry_err_info.cls_name,
                    cause=retry_err_info.cause,
                )
                yield DagsterEvent.step_failure_event(
                    step_context=step_context,
                    step_failure_data=StepFailureData(error=fail_err,
                                                      user_failure_data=None),
                )
            else:
                attempt_num = prev_attempts + 1
                yield DagsterEvent.step_retry_event(
                    step_context,
                    StepRetryData(
                        error=retry_err_info,
                        seconds_to_wait=retry_request.seconds_to_wait,
                    ),
                )

    # case (2) in top comment
    except Failure as failure:
        yield _step_failure_event_from_exc_info(
            step_context,
            sys.exc_info(),
            UserFailureData(
                label='intentional-failure',
                description=failure.description,
                metadata_entries=failure.metadata_entries,
            ),
        )
        if step_context.raise_on_error:
            raise failure

    # case (3) in top comment
    except DagsterUserCodeExecutionError as dagster_user_error:
        yield _step_failure_event_from_exc_info(
            step_context,
            dagster_user_error.original_exc_info,
        )

        if step_context.raise_on_error:
            raise dagster_user_error.user_exception

    # case (4) in top comment
    except DagsterError as dagster_error:
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        if step_context.raise_on_error:
            raise dagster_error

    # case (5) in top comment
    except (Exception, KeyboardInterrupt) as unexpected_exception:  # pylint: disable=broad-except
        yield _step_failure_event_from_exc_info(step_context, sys.exc_info())

        raise unexpected_exception
Exemplo n.º 26
0
    def execute(self, pipeline_context, execution_plan):
        check.inst_param(pipeline_context, "pipeline_context",
                         SystemPipelineExecutionContext)
        check.inst_param(execution_plan, "execution_plan", ExecutionPlan)

        limit = self.max_concurrent

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Executing steps using multiprocess executor: parent process (pid: {pid})"
            .format(pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(
                os.getpid(),
                step_keys_to_execute=execution_plan.step_keys_to_execute),
        )

        # It would be good to implement a reference tracking algorithm here so we could
        # garbage collect results that are no longer needed by any steps
        # https://github.com/dagster-io/dagster/issues/811
        with time_execution_scope() as timer_result:
            with execution_plan.start(
                    retry_mode=self.retries) as active_execution:
                active_iters = {}
                errors = {}
                term_events = {}
                stopping = False

                while (not stopping
                       and not active_execution.is_complete) or active_iters:
                    if active_execution.check_for_interrupts():
                        yield DagsterEvent.engine_event(
                            pipeline_context,
                            "Multiprocess executor: received termination signal - "
                            "forwarding to active child processes",
                            EngineEventData.interrupted(
                                list(term_events.keys())),
                        )
                        stopping = True
                        active_execution.mark_interrupted()
                        for key, event in term_events.items():
                            event.set()

                    # start iterators
                    while len(active_iters) < limit and not stopping:
                        steps = active_execution.get_steps_to_execute(
                            limit=(limit - len(active_iters)))

                        if not steps:
                            break

                        for step in steps:
                            step_context = pipeline_context.for_step(step)
                            term_events[step.key] = multiprocessing.Event()
                            active_iters[
                                step.key] = self.execute_step_out_of_process(
                                    step_context,
                                    step,
                                    errors,
                                    term_events,
                                    active_execution.get_known_state(),
                                )

                    # process active iterators
                    empty_iters = []
                    for key, step_iter in active_iters.items():
                        try:
                            event_or_none = next(step_iter)
                            if event_or_none is None:
                                continue
                            else:
                                yield event_or_none
                                active_execution.handle_event(event_or_none)

                        except ChildProcessCrashException as crash:
                            serializable_error = serializable_error_info_from_exc_info(
                                sys.exc_info())
                            yield DagsterEvent.engine_event(
                                pipeline_context,
                                ("Multiprocess executor: child process for step {step_key} "
                                 "unexpectedly exited with code {exit_code}"
                                 ).format(step_key=key,
                                          exit_code=crash.exit_code),
                                EngineEventData.engine_error(
                                    serializable_error),
                                step_handle=active_execution.get_step_by_key(
                                    key).handle,
                            )
                            step_failure_event = DagsterEvent.step_failure_event(
                                step_context=pipeline_context.for_step(
                                    active_execution.get_step_by_key(key)),
                                step_failure_data=StepFailureData(
                                    error=serializable_error,
                                    user_failure_data=None),
                            )
                            active_execution.handle_event(step_failure_event)
                            yield step_failure_event
                            empty_iters.append(key)
                        except StopIteration:
                            empty_iters.append(key)

                    # clear and mark complete finished iterators
                    for key in empty_iters:
                        del active_iters[key]
                        del term_events[key]
                        active_execution.verify_complete(pipeline_context, key)

                    # process skipped and abandoned steps
                    yield from active_execution.plan_events_iterator(
                        pipeline_context)

                errs = {pid: err for pid, err in errors.items() if err}

                # After termination starts, raise an interrupted exception once all subprocesses
                # have finished cleaning up (and the only errors were from being interrupted)
                if (stopping and (not active_iters) and all([
                        err_info.cls_name == "DagsterExecutionInterruptedError"
                        for err_info in errs.values()
                ])):
                    yield DagsterEvent.engine_event(
                        pipeline_context,
                        "Multiprocess executor: interrupted all active child processes",
                        event_specific_data=EngineEventData(),
                    )
                    raise DagsterExecutionInterruptedError()
                elif errs:
                    raise DagsterSubprocessError(
                        "During multiprocess execution errors occurred in child processes:\n{error_list}"
                        .format(error_list="\n".join([
                            "In process {pid}: {err}".format(
                                pid=pid, err=err.to_string())
                            for pid, err in errs.items()
                        ])),
                        subprocess_error_infos=list(errs.values()),
                    )

        yield DagsterEvent.engine_event(
            pipeline_context,
            "Multiprocess executor: parent process exiting after {duration} (pid: {pid})"
            .format(duration=format_duration(timer_result.millis),
                    pid=os.getpid()),
            event_specific_data=EngineEventData.multiprocess(os.getpid()),
        )
def scoped_pipeline_context(
    pipeline_def,
    environment_dict,
    run_config,
    instance,
    system_storage_data=None,
    scoped_resources_builder_cm=create_resource_builder,
    raise_on_error=False,
):
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
    check.dict_param(environment_dict, 'environment_dict', key_type=str)
    check.inst_param(run_config, 'run_config', RunConfig)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.opt_inst_param(system_storage_data, 'system_storage_data',
                         SystemStorageData)

    context_creation_data = create_context_creation_data(
        pipeline_def, environment_dict, run_config, instance)

    executor_config = create_executor_config(context_creation_data)

    # After this try block, a Dagster exception thrown will result in a pipeline init failure event.
    pipeline_context = None
    try:
        executor_config.check_requirements(
            instance, context_creation_data.system_storage_def)

        log_manager = create_log_manager(context_creation_data)

        with scoped_resources_builder_cm(
                context_creation_data.pipeline_def,
                context_creation_data.environment_config,
                context_creation_data.run_config,
                log_manager,
        ) as scoped_resources_builder:

            system_storage_data = create_system_storage_data(
                context_creation_data, system_storage_data,
                scoped_resources_builder)

            pipeline_context = construct_pipeline_execution_context(
                context_creation_data=context_creation_data,
                scoped_resources_builder=scoped_resources_builder,
                system_storage_data=system_storage_data,
                log_manager=log_manager,
                executor_config=executor_config,
                raise_on_error=raise_on_error,
            )
            yield pipeline_context

    except DagsterError as dagster_error:
        # only yield an init failure event if we haven't already yielded context
        if pipeline_context is None:
            user_facing_exc_info = (
                # pylint does not know original_exc_info exists is is_user_code_error is true
                # pylint: disable=no-member
                dagster_error.original_exc_info
                if dagster_error.is_user_code_error else sys.exc_info())

            error_info = serializable_error_info_from_exc_info(
                user_facing_exc_info)
            yield DagsterEvent.pipeline_init_failure(
                pipeline_name=pipeline_def.name,
                failure_data=PipelineInitFailureData(error=error_info),
                log_manager=_create_context_free_log_manager(
                    instance, run_config, pipeline_def),
            )

            if raise_on_error:
                raise dagster_error

        # if we've caught an error after context init we're in a problematic state and should just raise
        else:
            raise dagster_error
Exemplo n.º 28
0
def orchestration_context_event_generator(
    pipeline: IPipeline,
    execution_plan: ExecutionPlan,
    run_config: Dict[str, Any],
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool,
    executor_defs: Optional[List[ExecutorDefinition]],
    output_capture: Optional[Dict["StepOutputHandle", Any]],
) -> Generator[Union[DagsterEvent, PlanOrchestrationContext], None, None]:
    check.invariant(executor_defs is None)
    context_creation_data = create_context_creation_data(
        pipeline,
        execution_plan,
        run_config,
        pipeline_run,
        instance,
    )

    log_manager = create_log_manager(context_creation_data)

    try:
        executor = create_executor(context_creation_data)

        execution_context = PlanOrchestrationContext(
            plan_data=create_plan_data(context_creation_data, raise_on_error,
                                       executor.retries),
            log_manager=log_manager,
            executor=executor,
            output_capture=output_capture,
        )

        _validate_plan_with_context(execution_context, execution_plan)

        yield execution_context
    except DagsterError as dagster_error:
        dagster_error = cast(DagsterUserCodeExecutionError, dagster_error)
        user_facing_exc_info = (
            # pylint does not know original_exc_info exists is is_user_code_error is true
            # pylint: disable=no-member
            dagster_error.original_exc_info
            if dagster_error.is_user_code_error else sys.exc_info())
        error_info = serializable_error_info_from_exc_info(
            user_facing_exc_info)

        event = DagsterEvent.pipeline_failure(
            pipeline_context_or_name=pipeline_run.pipeline_name,
            context_msg=
            (f'Pipeline failure during initialization for pipeline "{pipeline_run.pipeline_name}". '
             "This may be due to a failure in initializing the executor or one of the loggers."
             ),
            error_info=error_info,
        )
        log_manager.error(
            event.message,
            dagster_event=event,
            pipeline_name=pipeline_run.pipeline_name,
        )
        yield event

        if raise_on_error:
            raise dagster_error
Exemplo n.º 29
0
Arquivo: api.py Projeto: prezi/dagster
def pipeline_execution_iterator(
        pipeline_context: PlanOrchestrationContext,
        execution_plan: ExecutionPlan) -> Iterator[DagsterEvent]:
    """A complete execution of a pipeline. Yields pipeline start, success,
    and failure events.

    Args:
        pipeline_context (PlanOrchestrationContext):
        execution_plan (ExecutionPlan):
    """

    yield DagsterEvent.pipeline_start(pipeline_context)

    pipeline_exception_info = None
    pipeline_canceled_info = None
    failed_steps = []
    generator_closed = False
    try:
        for event in pipeline_context.executor.execute(pipeline_context,
                                                       execution_plan):
            if event.is_step_failure:
                failed_steps.append(event.step_key)

            yield event
    except GeneratorExit:
        # Shouldn't happen, but avoid runtime-exception in case this generator gets GC-ed
        # (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/).
        generator_closed = True
        pipeline_exception_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        if pipeline_context.raise_on_error:
            raise
    except (KeyboardInterrupt, DagsterExecutionInterruptedError):
        pipeline_canceled_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        if pipeline_context.raise_on_error:
            raise
    except Exception:  # pylint: disable=broad-except
        pipeline_exception_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        if pipeline_context.raise_on_error:
            raise  # finally block will run before this is re-raised
    finally:
        if pipeline_canceled_info:
            reloaded_run = pipeline_context.instance.get_run_by_id(
                pipeline_context.run_id)
            if reloaded_run and reloaded_run.status == PipelineRunStatus.CANCELING:
                event = DagsterEvent.pipeline_canceled(pipeline_context,
                                                       pipeline_canceled_info)
            else:
                event = DagsterEvent.pipeline_failure(
                    pipeline_context,
                    "Execution was interrupted unexpectedly. "
                    "No user initiated termination request was found, treating as failure.",
                    pipeline_canceled_info,
                )
        elif pipeline_exception_info:
            event = DagsterEvent.pipeline_failure(
                pipeline_context,
                "An exception was thrown during execution.",
                pipeline_exception_info,
            )
        elif failed_steps:
            event = DagsterEvent.pipeline_failure(
                pipeline_context,
                "Steps failed: {}.".format(failed_steps),
            )
        else:
            event = DagsterEvent.pipeline_success(pipeline_context)
        if not generator_closed:
            yield event
Exemplo n.º 30
0
    def check_step_health(
            self,
            step_handler_context: StepHandlerContext) -> List[DagsterEvent]:
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        client = self._get_client()

        container_name = self._get_container_name(
            step_handler_context.execute_step_args.pipeline_run_id,
            step_key,
        )

        try:
            container = client.containers.get(container_name)

        except Exception as e:
            return [
                DagsterEvent(
                    event_type_value=DagsterEventType.STEP_FAILURE.value,
                    pipeline_name=step_handler_context.execute_step_args.
                    pipeline_origin.pipeline_name,
                    step_key=step_key,
                    message=
                    f"Error when checking on step container health: {e}",
                    event_specific_data=StepFailureData(
                        error=None,
                        user_failure_data=None,
                    ),
                )
            ]

        if container.status == "running":
            return []

        try:
            container_info = container.wait(timeout=0.1)
        except Exception as e:
            return [
                DagsterEvent(
                    event_type_value=DagsterEventType.STEP_FAILURE.value,
                    pipeline_name=step_handler_context.execute_step_args.
                    pipeline_origin.pipeline_name,
                    step_key=step_key,
                    message=
                    f"Container status is {container.status}. Hit exception attempting to get its return code: {e}",
                    event_specific_data=StepFailureData(
                        error=None,
                        user_failure_data=None,
                    ),
                )
            ]

        ret_code = container_info.get("StatusCode")
        if ret_code == 0:
            return []

        return [
            DagsterEvent(
                event_type_value=DagsterEventType.STEP_FAILURE.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Container status is {container.status}. Return code is {str(ret_code)}.",
                event_specific_data=StepFailureData(
                    error=None,
                    user_failure_data=None,
                ),
            )
        ]