def test_update_tick_to_failure(self, storage): assert storage current_time = time.time() tick = storage.create_job_tick(self.build_tick(current_time)) updated_tick = tick.with_status( JobTickStatus.FAILURE, error=SerializableErrorInfo(message="Error", stack=[], cls_name="TestError"), ) assert updated_tick.status == JobTickStatus.FAILURE storage.update_job_tick(updated_tick) ticks = storage.get_job_ticks("my_schedule") assert len(ticks) == 1 tick = ticks[0] assert tick.tick_id == 1 assert tick.job_name == "my_schedule" assert tick.timestamp == current_time assert tick.status == JobTickStatus.FAILURE assert tick.run_ids == [] assert tick.error == SerializableErrorInfo(message="Error", stack=[], cls_name="TestError")
def test_update_tick_to_failure(self, storage): assert storage repository = RepositoryDefinition("repository_name") current_time = time.time() tick = storage.create_schedule_tick(repository.name, self.build_tick(current_time)) updated_tick = tick.with_status( ScheduleTickStatus.FAILURE, error=SerializableErrorInfo(message="Error", stack=[], cls_name="TestError"), ) assert updated_tick.status == ScheduleTickStatus.FAILURE storage.update_schedule_tick(repository.name, updated_tick) ticks = storage.get_schedule_ticks_by_schedule(repository.name, "my_schedule") assert len(ticks) == 1 tick = ticks[0] assert tick.tick_id == 1 assert tick.schedule_name == "my_schedule" assert tick.cron_schedule == "* * * * *" assert tick.timestamp == current_time assert tick.status == ScheduleTickStatus.FAILURE assert tick.run_id == None assert tick.error == SerializableErrorInfo(message="Error", stack=[], cls_name="TestError")
def run_loop_error(_, _instance, _workspace): # ?message stack cls_name cause" yield SerializableErrorInfo("foobar", None, None, None) yield SerializableErrorInfo("bizbuz", None, None, None) while True: yield time.sleep(0.5)
def test_get_schedule_stats(self, storage): assert storage repository = RepositoryDefinition("repository_name") current_time = time.time() error = SerializableErrorInfo(message="Error", stack=[], cls_name="TestError") # Create ticks for x in range(2): storage.create_schedule_tick(repository.name, self.build_tick(current_time)) for x in range(3): storage.create_schedule_tick( repository.name, self.build_tick(current_time, ScheduleTickStatus.SUCCESS, run_id=str(x)), ) for x in range(4): storage.create_schedule_tick( repository.name, self.build_tick(current_time, ScheduleTickStatus.SKIPPED), ) for x in range(5): storage.create_schedule_tick( repository.name, self.build_tick(current_time, ScheduleTickStatus.FAILURE, error=error), ) stats = storage.get_schedule_tick_stats_by_schedule(repository.name, "my_schedule") assert stats.ticks_started == 2 assert stats.ticks_succeeded == 3 assert stats.ticks_skipped == 4 assert stats.ticks_failed == 5
def test_error_backcompat(): old_heartbeat = '{"__class__": "DaemonHeartbeat", "daemon_id": "foobar", "daemon_type": {"__enum__": "DaemonType.SENSOR"}, "error": {"__class__": "SerializableErrorInfo", "cause": null, "cls_name": null, "message": "fizbuz", "stack": []}, "timestamp": 0.0}' heartbeat = deserialize_json_to_dagster_namedtuple(old_heartbeat) assert heartbeat.daemon_id == "foobar" assert heartbeat.daemon_type == "SENSOR" assert heartbeat.timestamp == 0.0 assert heartbeat.errors == [SerializableErrorInfo("fizbuz", [], None)]
def error_from_data(data): return SerializableErrorInfo( message=data["message"], stack=data["stack"], cls_name=data["className"], cause=error_from_data(data["cause"]) if data.get("cause") else None, )
def error_from_data(data): return SerializableErrorInfo( message=data['message'], stack=data['stack'], cls_name=data['className'], cause=error_from_data(data['cause']) if data.get('cause') else None, )
def test_ticks_filtered(self, storage): storage.create_tick(self.build_sensor_tick(time.time(), status=TickStatus.STARTED)) storage.create_tick(self.build_sensor_tick(time.time(), status=TickStatus.SUCCESS)) storage.create_tick(self.build_sensor_tick(time.time(), status=TickStatus.SKIPPED)) storage.create_tick( self.build_sensor_tick( time.time(), status=TickStatus.FAILURE, error=SerializableErrorInfo(message="foobar", stack=[], cls_name=None, cause=None), ) ) ticks = storage.get_ticks("my_sensor", "my_sensor") assert len(ticks) == 4 started = storage.get_ticks("my_sensor", "my_sensor", statuses=[TickStatus.STARTED]) assert len(started) == 1 successes = storage.get_ticks("my_sensor", "my_sensor", statuses=[TickStatus.SUCCESS]) assert len(successes) == 1 skips = storage.get_ticks("my_sensor", "my_sensor", statuses=[TickStatus.SKIPPED]) assert len(skips) == 1 failures = storage.get_ticks("my_sensor", "my_sensor", statuses=[TickStatus.FAILURE]) assert len(failures) == 1 # everything but skips non_skips = storage.get_ticks( "my_sensor", "my_sensor", statuses=[TickStatus.STARTED, TickStatus.SUCCESS, TickStatus.FAILURE], ) assert len(non_skips) == 3
def test_multiline_logging_complex(): msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0' kwargs = { 'pipeline': 'example', 'pipeline_name': 'example', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', 'dagster_event': DagsterEvent( event_type_value='STEP_FAILURE', pipeline_name='error_monster', step_key='start.materialization.output.result.0', solid_handle=SolidHandle('start', None), step_kind_value='MATERIALIZATION_THUNK', logging_tags={ 'pipeline': 'error_monster', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', }, event_specific_data=StepFailureData( error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=['a stack message'], cls_name='FileNotFoundError', ), user_failure_data=None, ), ), } with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager('123', {}, [logger]) dl.info(msg, **kwargs) expected_results = [ 'error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step ' 'start.materialization.output.result.0', ' cls_name = "FileNotFoundError"', ' error_message = "FileNotFoundError: [Errno 2] No such file or directory:' ' \'/path/to/file\'\\n"', ' solid = "start"', ' solid_definition = "emit_num"', ' step_key = "start.materialization.output.result.0"', '', "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'", '', 'a stack message', ] assert captured_results[0].split('\n') == expected_results
def _start_pipeline_execution_for_created_run(graphene_info, run_id): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) instance = graphene_info.context.instance execution_manager_settings = instance.dagit_settings.get( 'execution_manager') if execution_manager_settings and execution_manager_settings.get( 'disabled'): return graphene_info.schema.type_named( 'StartPipelineRunDisabledError')() pipeline_run = instance.get_run_by_id(run_id) if not pipeline_run: return graphene_info.schema.type_named('PipelineRunNotFoundError')( run_id) external_pipeline = get_external_pipeline_subset_or_raise( graphene_info, pipeline_run.selector.name, pipeline_run.selector.solid_subset) validated_config = validate_config_from_snap( external_pipeline.config_schema_snapshot, external_pipeline.root_config_key_for_mode(pipeline_run.mode), pipeline_run.environment_dict, ) if not validated_config.success: # If the config is invalid, we construct a DagsterInvalidConfigError exception and # insert it into the event log. We also return a PipelineConfigValidationInvalid user facing # graphql error. # We currently re-use the engine events machinery to add the error to the event log, but # may need to create a new event type and instance method to handle these erros. invalid_config_exception = DagsterInvalidConfigError( 'Error in config for pipeline {}'.format(external_pipeline.name), validated_config.errors, pipeline_run.environment_dict, ) instance.report_engine_event( str(invalid_config_exception.message), pipeline_run, EngineEventData.engine_error( SerializableErrorInfo( invalid_config_exception.message, [], DagsterInvalidConfigError.__class__.__name__, None, )), ) instance.report_run_failed(pipeline_run) return DauphinPipelineConfigValidationInvalid.for_validation_errors( external_pipeline.pipeline_index, validated_config.errors) graphene_info.context.execute_pipeline(external_pipeline, pipeline_run) return graphene_info.schema.type_named('StartPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))
def test_multiline_logging_complex(): msg = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0" kwargs = { "pipeline": "example", "pipeline_name": "example", "step_key": "start.materialization.output.result.0", "solid": "start", "solid_definition": "emit_num", "dagster_event": DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="error_monster", step_key="start.materialization.output.result.0", solid_handle=SolidHandle("start", None), step_kind_value="MATERIALIZATION_THUNK", logging_tags={ "pipeline": "error_monster", "step_key": "start.materialization.output.result.0", "solid": "start", "solid_definition": "emit_num", }, event_specific_data=StepFailureData( error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=["a stack message"], cls_name="FileNotFoundError", ), user_failure_data=None, ), ), } with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager("123", {}, [logger]) dl.info(msg, **kwargs) expected_results = [ "error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step " "start.materialization.output.result.0", ' cls_name = "FileNotFoundError"', ' error_message = "FileNotFoundError: [Errno 2] No such file or directory:' " '/path/to/file'\\n\"", ' solid = "start"', ' solid_definition = "emit_num"', ' step_key = "start.materialization.output.result.0"', "", "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'", "", "a stack message", ] assert captured_results[0].split("\n") == expected_results
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') materialization = event_dict.get('intermediateMaterialization') or {} # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), value_repr=event_dict['valueRepr'], intermediate_materialization=Materialization( path=materialization.get('path'), description=materialization.get('description')), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: event_specific_data = StepMaterializationData( materialization=Materialization(path=materialization.get('path'), description=materialization.get( 'description'))) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: result_metadata = event_dict['expectationResult'][ 'resultMetadataJsonString'] expectation_result = ExpectationResult( event_dict['expectationResult']['success'], event_dict['expectationResult']['name'], event_dict['expectationResult']['message'], json.loads(result_metadata) if result_metadata else None, ) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData(error_info) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None, None), step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )
def construct_error_info(logger_message): if 'error_info' not in logger_message.meta: return None raw_error_info = logger_message.meta['error_info'] message, stack = json.loads(raw_error_info) return SerializableErrorInfo(message, stack)
def create_failed_post_processing_error( context: ContextData, original_value: object, error_data: SerializableErrorInfo ) -> EvaluationError: check.inst_param(context, "context", ContextData) check.inst_param(error_data, "error_data", SerializableErrorInfo) return EvaluationError( stack=context.stack, reason=DagsterEvaluationErrorReason.FAILED_POST_PROCESSING, message="Post processing {path_msg} of original value {original_value} failed:\n{error}".format( path_msg=get_friendly_path_msg(context.stack), original_value=original_value, error=error_data.to_string(), ), error_data=error_data, )
def test_multiline_logging_complex(): msg = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0" dagster_event = DagsterEvent( event_type_value="STEP_FAILURE", pipeline_name="error_monster", step_key="start.materialization.output.result.0", solid_handle=NodeHandle("start", None), step_kind_value="MATERIALIZATION_THUNK", logging_tags={ "pipeline": "error_monster", "step_key": "start.materialization.output.result.0", "solid": "start", "solid_definition": "emit_num", }, event_specific_data=StepFailureData( error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=["a stack message"], cls_name="FileNotFoundError", ), user_failure_data=None, ), ) with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager.create(loggers=[logger], pipeline_run=PipelineRun( run_id="123", pipeline_name="error_monster")) dl.log_dagster_event(logging.INFO, msg, dagster_event) expected_results = [ "error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step " "start.materialization.output.result.0", "", "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'", "", "Stack Trace:", "a stack message", ] assert captured_results[0].split("\n") == expected_results
def test_get_daemon_error(self, graphql_context): if graphql_context.instance.is_ephemeral: pytest.skip( "The daemon isn't compatible with an in-memory instance") graphql_context.instance.add_daemon_heartbeat( DaemonHeartbeat( timestamp=100.0, daemon_type=DaemonType.SENSOR, daemon_id=None, error=SerializableErrorInfo(message="foobar", stack=[], cls_name=None, cause=None), )) results = execute_dagster_graphql(graphql_context, DAEMON_HEALTH_QUERY) assert results.data["instance"]["daemonHealth"]["sensor"] == { "lastHeartbeatError": { "message": "foobar" }, }
def test_update_sensor_tick_to_failure(self, storage): assert storage current_time = time.time() tick = storage.create_tick(self.build_sensor_tick(current_time)) error = SerializableErrorInfo(message="Error", stack=[], cls_name="TestError") updated_tick = tick.with_status(TickStatus.FAILURE, error=error) assert updated_tick.status == TickStatus.FAILURE storage.update_tick(updated_tick) ticks = storage.get_ticks("my_sensor") assert len(ticks) == 1 tick = ticks[0] assert tick.tick_id > 0 assert tick.instigator_name == "my_sensor" assert tick.timestamp == current_time assert tick.status == TickStatus.FAILURE assert tick.run_ids == [] assert tick.error == error
def _dagster_event_sequence_for_step(step_context, retries): """ Yield a sequence of dagster events for the given step with the step context. This function also processes errors. It handles a few error cases: (1) User code requests to be retried: A RetryRequested has been raised. We will either put the step in to up_for_retry state or a failure state depending on the number of previous attempts and the max_retries on the received RetryRequested. (2) User code fails successfully: The user-space code has raised a Failure which may have explicit metadata attached. (3) User code fails unexpectedly: The user-space code has raised an Exception. It has been wrapped in an exception derived from DagsterUserCodeException. In that case the original user exc_info is stashed on the exception as the original_exc_info property. (4) User error: The framework raised a DagsterError that indicates a usage error or some other error not communicated by a user-thrown exception. For example, if the user yields an object out of a compute function that is not a proper event (not an Output, ExpectationResult, etc). (5) Framework failure or interrupt: An unexpected error occurred. This is a framework error. Either there has been an internal error in the framework OR we have forgotten to put a user code error boundary around invoked user-space code. These terminate the computation immediately (by re-raising). The "raised_dagster_errors" context manager can be used to force these errors to be re-raised and surfaced to the user. This is mostly to get sensible errors in test and ad-hoc contexts, rather than forcing the user to wade through the PipelineExecutionResult API in order to find the step that failed. For tools, however, this option should be false, and a sensible error message signaled to the user within that tool. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.inst_param(retries, "retries", Retries) try: prior_attempt_count = retries.get_attempt_count(step_context.step.key) if step_context.step_launcher: step_events = step_context.step_launcher.launch_step( step_context, prior_attempt_count) else: step_events = core_dagster_event_sequence_for_step( step_context, prior_attempt_count) for step_event in check.generator(step_events): yield step_event # case (1) in top comment except RetryRequested as retry_request: retry_err_info = serializable_error_info_from_exc_info(sys.exc_info()) if retries.disabled: fail_err = SerializableErrorInfo( message="RetryRequested but retries are disabled", stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: # retries.enabled or retries.deferred prev_attempts = retries.get_attempt_count(step_context.step.key) if prev_attempts >= retry_request.max_retries: fail_err = SerializableErrorInfo( message="Exceeded max_retries of {}".format( retry_request.max_retries), stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: yield DagsterEvent.step_retry_event( step_context, StepRetryData( error=retry_err_info, seconds_to_wait=retry_request.seconds_to_wait, ), ) # case (2) in top comment except Failure as failure: yield _step_failure_event_from_exc_info( step_context, sys.exc_info(), UserFailureData( label="intentional-failure", description=failure.description, metadata_entries=failure.metadata_entries, ), ) if step_context.raise_on_error: raise failure # case (3) in top comment except DagsterUserCodeExecutionError as dagster_user_error: yield _step_failure_event_from_exc_info( step_context, dagster_user_error.original_exc_info, ) if step_context.raise_on_error: raise dagster_user_error.user_exception # case (4) in top comment except DagsterError as dagster_error: yield _step_failure_event_from_exc_info(step_context, sys.exc_info()) if step_context.raise_on_error: raise dagster_error # case (5) in top comment except (Exception, KeyboardInterrupt) as unexpected_exception: # pylint: disable=broad-except yield _step_failure_event_from_exc_info(step_context, sys.exc_info()) raise unexpected_exception
def test_sensor_ticks_filtered(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name()).get_repository(main_repo_name()) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) # turn the sensor on graphql_context.instance.add_instigator_state( InstigatorState(external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.RUNNING)) now = pendulum.now("US/Central") with pendulum.test(now): _create_tick(graphql_context) # create a success tick # create a started tick graphql_context.instance.create_tick( TickData( instigator_origin_id=external_sensor.get_external_origin().get_id( ), instigator_name=sensor_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.STARTED, timestamp=now.timestamp(), selector_id=external_sensor.selector_id, )) # create a skipped tick graphql_context.instance.create_tick( TickData( instigator_origin_id=external_sensor.get_external_origin().get_id( ), instigator_name=sensor_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.SKIPPED, timestamp=now.timestamp(), selector_id=external_sensor.selector_id, )) # create a failed tick graphql_context.instance.create_tick( TickData( instigator_origin_id=external_sensor.get_external_origin().get_id( ), instigator_name=sensor_name, instigator_type=InstigatorType.SENSOR, status=TickStatus.FAILURE, timestamp=now.timestamp(), error=SerializableErrorInfo(message="foobar", stack=[], cls_name=None, cause=None), selector_id=external_sensor.selector_id, )) result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={"sensorSelector": sensor_selector}, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 4 result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={ "sensorSelector": sensor_selector, "statuses": ["STARTED"] }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "status"] == "STARTED" result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={ "sensorSelector": sensor_selector, "statuses": ["FAILURE"] }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "status"] == "FAILURE" result = execute_dagster_graphql( graphql_context, GET_TICKS_QUERY, variables={ "sensorSelector": sensor_selector, "statuses": ["SKIPPED"] }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0][ "status"] == "SKIPPED"
def run_iteration_error(_, _instance, _workspace): # ?message stack cls_name cause" yield SerializableErrorInfo("foobar", None, None, None) yield SerializableErrorInfo("bizbuz", None, None, None)
def dagster_event_sequence_for_step( step_context: StepExecutionContext, force_local_execution: bool = False ) -> Iterator[DagsterEvent]: """ Yield a sequence of dagster events for the given step with the step context. This function also processes errors. It handles a few error cases: (1) User code requests to be retried: A RetryRequested has been raised. We will either put the step in to up_for_retry state or a failure state depending on the number of previous attempts and the max_retries on the received RetryRequested. (2) User code fails successfully: The user-space code has raised a Failure which may have explicit metadata attached. (3) User code fails unexpectedly: The user-space code has raised an Exception. It has been wrapped in an exception derived from DagsterUserCodeException. In that case the original user exc_info is stashed on the exception as the original_exc_info property. (4) Execution interrupted: The run was interrupted in the middle of execution (typically by a termination request). (5) User error: The framework raised a DagsterError that indicates a usage error or some other error not communicated by a user-thrown exception. For example, if the user yields an object out of a compute function that is not a proper event (not an Output, ExpectationResult, etc). (6) Framework failure: An unexpected error occurred. This is a framework error. Either there has been an internal error in the framework OR we have forgotten to put a user code error boundary around invoked user-space code. These terminate the computation immediately (by re-raising). The "raised_dagster_errors" context manager can be used to force these errors to be re-raised and surfaced to the user. This is mostly to get sensible errors in test and ad-hoc contexts, rather than forcing the user to wade through the PipelineExecutionResult API in order to find the step that failed. For tools, however, this option should be false, and a sensible error message signaled to the user within that tool. When we launch a step that has a step launcher, we use this function on both the host process and the remote process. When we run the step in the remote process, to prevent an infinite loop of launching steps that then launch steps, and so on, the remote process will run this with the force_local_execution argument set to True. """ check.inst_param(step_context, "step_context", StepExecutionContext) try: if step_context.step_launcher and not force_local_execution: # info all on step_context - should deprecate second arg step_events = step_context.step_launcher.launch_step( step_context, step_context.previous_attempt_count ) else: step_events = core_dagster_event_sequence_for_step(step_context) for step_event in check.generator(step_events): yield step_event # case (1) in top comment except RetryRequested as retry_request: retry_err_info = serializable_error_info_from_exc_info(sys.exc_info()) if step_context.retry_mode.disabled: fail_err = SerializableErrorInfo( message="RetryRequested but retries are disabled", stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) step_context.capture_step_exception(retry_request) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData(error=fail_err, user_failure_data=None), ) else: # retries.enabled or retries.deferred prev_attempts = step_context.previous_attempt_count if prev_attempts >= retry_request.max_retries: fail_err = SerializableErrorInfo( message=f"Exceeded max_retries of {retry_request.max_retries}\n", stack=retry_err_info.stack, cls_name=retry_err_info.cls_name, cause=retry_err_info.cause, ) step_context.capture_step_exception(retry_request) yield DagsterEvent.step_failure_event( step_context=step_context, step_failure_data=StepFailureData( error=fail_err, user_failure_data=None, # set the flag to omit the outer stack if we have a cause to show error_source=ErrorSource.USER_CODE_ERROR if fail_err.cause else None, ), ) else: yield DagsterEvent.step_retry_event( step_context, StepRetryData( error=retry_err_info, seconds_to_wait=retry_request.seconds_to_wait, ), ) # case (2) in top comment except Failure as failure: step_context.capture_step_exception(failure) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), UserFailureData( label="intentional-failure", description=failure.description, metadata_entries=failure.metadata_entries, ), ) if step_context.raise_on_error: raise failure # case (3) in top comment except DagsterUserCodeExecutionError as dagster_user_error: step_context.capture_step_exception(dagster_user_error.user_exception) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.USER_CODE_ERROR, ) if step_context.raise_on_error: raise dagster_user_error.user_exception # case (4) in top comment except (KeyboardInterrupt, DagsterExecutionInterruptedError) as interrupt_error: step_context.capture_step_exception(interrupt_error) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.INTERRUPT, ) raise interrupt_error # case (5) in top comment except DagsterError as dagster_error: step_context.capture_step_exception(dagster_error) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.FRAMEWORK_ERROR, ) if step_context.raise_on_error: raise dagster_error # case (6) in top comment except BaseException as unexpected_exception: step_context.capture_step_exception(unexpected_exception) yield step_failure_event_from_exc_info( step_context, sys.exc_info(), error_source=ErrorSource.UNEXPECTED_ERROR, ) raise unexpected_exception
def run_iteration_error(_, _instance, _grpc_server_registry): # ?message stack cls_name cause" yield SerializableErrorInfo("foobar", None, None, None) yield SerializableErrorInfo("bizbuz", None, None, None)
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = HANDLED_EVENTS.get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData( error_info, UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) # We should update the GraphQL response so that clients don't need to do this handle parsing. # See: https://github.com/dagster-io/dagster/issues/1559 keys = event_dict['step']['solidHandleID'].split('.') handle = None while keys: handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=handle, step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )
def execute_backfill_iteration(instance, logger, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) if not instance.has_bulk_actions_table(): message = ( "A schema migration is required before daemon-based backfills can be supported. " "Try running `dagster instance migrate` to migrate your instance and try again." ) logger.error(message) yield SerializableErrorInfo( message=message, stack=[], cls_name="", ) return backfill_jobs = instance.get_backfills(status=BulkActionStatus.REQUESTED) if not backfill_jobs: logger.error("No backfill jobs requested.") yield return for backfill_job in backfill_jobs: backfill_id = backfill_job.backfill_id if not backfill_job.last_submitted_partition_name: logger.info(f"Starting backfill for {backfill_id}") else: logger.info( f"Resuming backfill for {backfill_id} from {backfill_job.last_submitted_partition_name}" ) origin = (backfill_job.partition_set_origin.external_repository_origin. repository_location_origin) try: with origin.create_handle() as repo_location_handle: repo_location = repo_location_handle.create_location() has_more = True while has_more: chunk, checkpoint, has_more = _get_partitions_chunk( instance, logger, backfill_job, CHECKPOINT_COUNT) _check_for_debug_crash(debug_crash_flags, "BEFORE_SUBMIT") if chunk: submit_backfill_runs(instance, repo_location, backfill_job, chunk) _check_for_debug_crash(debug_crash_flags, "AFTER_SUBMIT") if has_more: instance.update_backfill( backfill_job.with_partition_checkpoint(checkpoint)) yield None time.sleep(CHECKPOINT_INTERVAL) else: logger.info( f"Backfill completed for {backfill_id} for {len(backfill_job.partition_names)} partitions" ) instance.update_backfill( backfill_job.with_status( BulkActionStatus.COMPLETED)) yield None except DagsterBackfillFailedError as e: error_info = e.serializable_error_info instance.update_backfill( backfill_job.with_status( BulkActionStatus.FAILED).with_error(error_info)) if error_info: logger.error( f"Backfill failed for {backfill_id}: {error_info.to_string()}" ) yield error_info
def _launch_pipeline_execution_for_created_run(graphene_info, run_id): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.str_param(run_id, 'run_id') # First retrieve the pipeline run instance = graphene_info.context.instance pipeline_run = instance.get_run_by_id(run_id) if not pipeline_run: return graphene_info.schema.type_named('PipelineRunNotFoundError')( run_id) external_pipeline = get_external_pipeline_or_raise( graphene_info, pipeline_run.selector.name, pipeline_run.selector.solid_subset) # Run config validation # If there are any config errors, then inject them into the event log validated_config = ensure_valid_config(external_pipeline, pipeline_run.mode, pipeline_run.environment_dict) if not validated_config.success: # If the config is invalid, we construct a DagsterInvalidConfigError exception and # insert it into the event log. We also return a PipelineConfigValidationInvalid user facing # graphql error. # We currently re-use the engine events machinery to add the error to the event log, but # may need to create a new event type and instance method to handle these errors. invalid_config_exception = DagsterInvalidConfigError( 'Error in config for pipeline {}'.format(external_pipeline.name), validated_config.errors, pipeline_run.environment_dict, ) instance.report_engine_event( str(invalid_config_exception.message), pipeline_run, EngineEventData.engine_error( SerializableErrorInfo( invalid_config_exception.message, [], DagsterInvalidConfigError.__class__.__name__, None, )), ) instance.report_run_failed(pipeline_run) return DauphinPipelineConfigValidationInvalid.for_validation_errors( external_pipeline, validated_config.errors) try: pipeline_run = instance.launch_run(pipeline_run.run_id) except DagsterLaunchFailedError: error = serializable_error_info_from_exc_info(sys.exc_info()) instance.report_engine_event( error.message, pipeline_run, EngineEventData.engine_error(error), ) instance.report_run_failed(pipeline_run) return graphene_info.schema.type_named('LaunchPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))
def get_run_execution_info_for_created_run_or_error( graphene_info, repository_location_name, repository_name, run_id ): """ Previously created run could either be created in a different process *or* during the launchScheduledRun call where we want to have a record of a run the was created but have invalid configuration """ check.inst_param(graphene_info, "graphene_info", ResolveInfo) check.str_param(repository_location_name, "repository_location_name") check.str_param(repository_name, "repository_name") check.str_param(run_id, "run_id") instance = graphene_info.context.instance pipeline_run = instance.get_run_by_id(run_id) if not pipeline_run: return graphene_info.schema.type_named("PipelineRunNotFoundError")(run_id) external_pipeline = get_external_pipeline_or_raise( graphene_info, _get_selector_with_workaround( graphene_info.context, repository_location_name, repository_name, pipeline_run ), ) validated_config = validate_config_from_snap( external_pipeline.config_schema_snapshot, external_pipeline.root_config_key_for_mode(pipeline_run.mode), pipeline_run.run_config, ) if not validated_config.success: # If the config is invalid, we construct a DagsterInvalidConfigError exception and # insert it into the event log. We also return a PipelineConfigValidationInvalid user facing # graphql error. # We currently re-use the engine events machinery to add the error to the event log, but # may need to create a new event type and instance method to handle these errors. invalid_config_exception = DagsterInvalidConfigError( "Error in config for pipeline {}".format(external_pipeline.name), validated_config.errors, pipeline_run.run_config, ) instance.report_engine_event( str(invalid_config_exception.message), pipeline_run, EngineEventData.engine_error( SerializableErrorInfo( invalid_config_exception.message, [], DagsterInvalidConfigError.__class__.__name__, None, ) ), ) instance.report_run_failed(pipeline_run) return DauphinPipelineConfigValidationInvalid.for_validation_errors( external_pipeline, validated_config.errors ) return RunExecutionInfo(external_pipeline, pipeline_run)
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), value_repr=event_dict['valueRepr'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], value_repr=event_dict['valueRepr'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData( error_info, UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None, None), step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )
def test_multiline_logging_complex(): msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0' kwargs = { 'pipeline': 'example', 'pipeline_name': 'example', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', 'dagster_event': DagsterEvent( event_type_value='STEP_FAILURE', pipeline_name='error_monster', step_key='start.materialization.output.result.0', solid_handle=SolidHandle('start', 'emit_num', None), step_kind_value='MATERIALIZATION_THUNK', logging_tags={ 'pipeline': 'error_monster', 'step_key': 'start.materialization.output.result.0', 'solid': 'start', 'solid_definition': 'emit_num', }, event_specific_data=StepFailureData(error=SerializableErrorInfo( message= "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", stack=[ ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n yield\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _iterate_step_outputs_within_boundary\n for step_output in gen:\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n runtime_type.output_schema.materialize_runtime_value(config_spec, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n return func(config_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n return func(selector_key, selector_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n with open(json_file_path, \'w\') as ff:\n', ], cls_name='FileNotFoundError', )), ), } with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger): dl = DagsterLogManager('123', {}, [logger]) dl.info(msg, **kwargs) kv_pairs = set(captured_results[0].split('\n')[1:]) expected_pairs = [ ' orig_message = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"', ' run_id = "123"', ' pipeline = "example"', ' solid_definition = "emit_num"', ' pipeline_name = "example"', ' solid = "start"', ' step_key = "start.materialization.output.result.0"', ] for e in expected_pairs: assert e in kv_pairs assert _regex_match_kv_pair( r' log_message_id = "{0}"'.format(REGEX_UUID), kv_pairs) assert _regex_match_kv_pair( r' log_timestamp = "{0}"'.format(REGEX_TS), kv_pairs) expected_dagster_event = { 'event_specific_data': [[ "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n", [ ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n yield\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _iterate_step_outputs_within_boundary\n for step_output in gen:\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n runtime_type.output_schema.materialize_runtime_value(config_spec, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n return func(config_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n return func(selector_key, selector_value, runtime_value)\n', ' File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n with open(json_file_path, \'w\') as ff:\n', ], 'FileNotFoundError', ]], 'event_type_value': 'STEP_FAILURE', 'pipeline_name': 'error_monster', 'solid_handle': ['start', 'emit_num', None], 'step_key': 'start.materialization.output.result.0', 'step_kind_value': 'MATERIALIZATION_THUNK', 'logging_tags': { 'pipeline': 'error_monster', 'solid': 'start', 'solid_definition': 'emit_num', 'step_key': 'start.materialization.output.result.0', }, } dagster_event = json.loads([ pair for pair in kv_pairs if 'dagster_event' in pair ][0].strip(' dagster_event = ')) assert dagster_event == expected_dagster_event
def _start_pipeline_execution_for_created_run(graphene_info, run_id): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) instance = graphene_info.context.instance execution_manager_settings = instance.dagit_settings.get( 'execution_manager') if execution_manager_settings and execution_manager_settings.get( 'disabled'): return graphene_info.schema.type_named( 'StartPipelineRunDisabledError')() pipeline_run = instance.get_run_by_id(run_id) if not pipeline_run: return graphene_info.schema.type_named('PipelineRunNotFoundError')( run_id) pipeline_def = get_pipeline_def_from_selector(graphene_info, pipeline_run.selector) environment_schema = create_environment_schema(pipeline_def, pipeline_run.mode) validated_config = validate_config(environment_schema.environment_type, pipeline_run.environment_dict) if not validated_config.success: # If the config is invalid, we construct a DagsterInvalidConfigError exception and # insert it into the event log. We also return a PipelineConfigValidationInvalid user facing # graphql error. # We currently re-use the engine events machinery to add the error to the event log, but # may need to create a new event type and instance method to handle these erros. invalid_config_exception = DagsterInvalidConfigError( 'Error in config for pipeline {}'.format(pipeline_def.name), validated_config.errors, pipeline_run.environment_dict, ) instance.report_engine_event( str(invalid_config_exception.message), pipeline_run, EngineEventData.engine_error( SerializableErrorInfo( invalid_config_exception.message, [], DagsterInvalidConfigError.__class__.__name__, None, )), ) # TODO: also insert a pipeline init failure event # https://github.com/dagster-io/dagster/issues/2385 return DauphinPipelineConfigValidationInvalid.for_validation_errors( pipeline_def, validated_config.errors) create_execution_plan( pipeline_def, pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.get_handle(), pipeline_def, pipeline_run, instance=instance, ) return graphene_info.schema.type_named('StartPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))