def test_event_log_storage_store_with_multiple_runs( event_storage_factory_cm_fn): with event_storage_factory_cm_fn() as storage: runs = ['foo', 'bar', 'baz'] for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 0 storage.store_event( DagsterEventRecord( None, 'Message2', 'debug', '', run_id, time.time(), dagster_event=DagsterEvent( DagsterEventType.STEP_SUCCESS.value, 'nonce', event_specific_data=StepSuccessData(duration_ms=100.0), ), )) for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 1 assert storage.get_stats_for_run(run_id).steps_succeeded == 1 storage.wipe() for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 0
def _execute_steps_core_loop(step_context, inputs, intermediates_manager): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(inputs, 'inputs', key_type=str) check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager) evaluated_inputs = {} # do runtime type checks of inputs versus step inputs for input_name, input_value in inputs.items(): evaluated_inputs[input_name] = _get_evaluated_input( step_context.step, input_name, input_value) yield DagsterEvent.step_start_event(step_context) with time_execution_scope() as timer_result: step_output_iterator = check.generator( _iterate_step_outputs_within_boundary(step_context, evaluated_inputs)) for step_output in check.generator( _error_check_step_outputs(step_context, step_output_iterator)): if isinstance(step_output, StepOutputValue): yield _create_step_output_event(step_context, step_output, intermediates_manager) elif isinstance(step_output, Materialization): yield DagsterEvent.step_materialization(step_context, step_output) elif isinstance(step_output, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, step_output) else: check.failed( 'Unexpected step_output {step_output}, should have been caught earlier' .format(step_output=step_output)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def test_event_log_storage_store_with_multiple_runs(self, storage): runs = ["foo", "bar", "baz"] for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 0 storage.store_event( EventRecord( None, "Message2", "debug", "", run_id, time.time(), dagster_event=DagsterEvent( DagsterEventType.STEP_SUCCESS.value, "nonce", event_specific_data=StepSuccessData(duration_ms=100.0), ), )) for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 1 assert storage.get_stats_for_run(run_id).steps_succeeded == 1 if self.can_wipe(): storage.wipe() for run_id in runs: assert len(storage.get_logs_for_run(run_id)) == 0
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') materialization = event_dict.get('intermediateMaterialization') or {} # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), value_repr=event_dict['valueRepr'], intermediate_materialization=Materialization( path=materialization.get('path'), description=materialization.get('description')), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: event_specific_data = StepMaterializationData( materialization=Materialization(path=materialization.get('path'), description=materialization.get( 'description'))) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: result_metadata = event_dict['expectationResult'][ 'resultMetadataJsonString'] expectation_result = ExpectationResult( event_dict['expectationResult']['success'], event_dict['expectationResult']['name'], event_dict['expectationResult']['message'], json.loads(result_metadata) if result_metadata else None, ) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData(error_info) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None, None), step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )
def _core_dagster_event_sequence_for_step(step_context): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediates_manager( step_context).items(): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, Materialization): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( 'Unexpected event {event}, should have been caught earlier' .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def test_recover_with_step_in_flight(): foo_job = define_foo_job() with pytest.raises( DagsterInvariantViolationError, match="Execution finished without completing the execution plan", ): with create_execution_plan(foo_job).start(RetryMode.DISABLED) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "foo_op" active_execution.handle_event( DagsterEvent( DagsterEventType.STEP_START.value, pipeline_name=foo_job.name, step_key=step_1.key, ) ) # CRASH!- we've closed the active execution. Now we recover, spinning up a new one with create_execution_plan(foo_job).start(RetryMode.DISABLED) as active_execution: possibly_in_flight_steps = active_execution.rebuild_from_events( [ DagsterEvent( DagsterEventType.STEP_START.value, pipeline_name=foo_job.name, step_key=step_1.key, ) ] ) assert possibly_in_flight_steps == [step_1] assert not active_execution.get_steps_to_execute() active_execution.handle_event( DagsterEvent( DagsterEventType.STEP_SUCCESS.value, pipeline_name=foo_job.name, event_specific_data=StepSuccessData(duration_ms=10.0), step_key=step_1.key, ) )
def _core_dagster_event_sequence_for_step(step_context, inputs, intermediates_manager): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(inputs, 'inputs', key_type=str) check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager) evaluated_inputs = {} # do runtime type checks of inputs versus step inputs for input_name, input_value in inputs.items(): evaluated_inputs[input_name] = _get_evaluated_input( step_context.step, input_name, input_value ) yield DagsterEvent.step_start_event(step_context) with time_execution_scope() as timer_result: event_sequence = check.generator( _event_sequence_for_step_compute_fn(step_context, evaluated_inputs) ) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for event in check.generator( _step_output_error_checked_event_sequence(step_context, event_sequence) ): if isinstance(event, Result): yield _create_step_output_event(step_context, event, intermediates_manager) elif isinstance(event, Materialization): yield DagsterEvent.step_materialization(step_context, event) elif isinstance(event, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, event) else: check.failed( 'Unexpected event {event}, should have been caught earlier'.format(event=event) ) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis) )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(op, value_name=input_name)) inputs[input_name] = [op.obj for op in input_value] else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) # We only want to log exactly one step success event or failure event if possible, # so wait to handle any interrupts (that normally log a failure event) until the success # event has finished with delay_interrupts(): yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = HANDLED_EVENTS.get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_UP_FOR_RETRY: event_specific_data = StepRetryData( error=error_from_data(event_dict['retryError']), seconds_to_wait=event_dict['secondsToWait'], ) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: event_specific_data = StepFailureData( error_from_data(event_dict['error']), UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) elif event_type == DagsterEventType.ENGINE_EVENT: event_specific_data = EngineEventData( metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries'))), marker_start=event_dict.get('markerStart'), marker_end=event_dict.get('markerEnd'), error=error_from_data(event_dict['engineError']) if event_dict.get('engineError') else None, ) # We should update the GraphQL response so that clients don't need to do this handle parsing. # See: https://github.com/dagster-io/dagster/issues/1559 handle = None step_key = None step_kind_value = None if 'step' in event_dict and event_dict['step']: step_key = event_dict['step']['key'] step_kind_value = event_dict['step']['kind'] keys = event_dict['step']['solidHandleID'].split('.') while keys: handle = SolidHandle(keys.pop(0), parent=handle) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=step_key, solid_handle=handle, step_kind_value=step_kind_value, logging_tags=None, event_specific_data=event_specific_data, )
def _stats_records(run_id): now = time.time() return [ _event_record(run_id, "A", now - 325, DagsterEventType.STEP_START), _event_record( run_id, "A", now - 225, DagsterEventType.STEP_SUCCESS, StepSuccessData(duration_ms=100000.0), ), _event_record(run_id, "B", now - 225, DagsterEventType.STEP_START), _event_record( run_id, "B", now - 175, DagsterEventType.STEP_FAILURE, StepFailureData(error=None, user_failure_data=None), ), _event_record(run_id, "C", now - 175, DagsterEventType.STEP_START), _event_record(run_id, "C", now - 150, DagsterEventType.STEP_SKIPPED), _event_record(run_id, "D", now - 150, DagsterEventType.STEP_START), _event_record( run_id, "D", now - 125, DagsterEventType.ASSET_MATERIALIZATION, StepMaterializationData(AssetMaterialization(asset_key="mat_1")), ), _event_record( run_id, "D", now - 100, DagsterEventType.STEP_EXPECTATION_RESULT, StepExpectationResultData( ExpectationResult(success=True, label="exp 1")), ), _event_record( run_id, "D", now - 75, DagsterEventType.ASSET_MATERIALIZATION, StepMaterializationData(AssetMaterialization(asset_key="mat_2")), ), _event_record( run_id, "D", now - 50, DagsterEventType.STEP_EXPECTATION_RESULT, StepExpectationResultData( ExpectationResult(success=False, label="exp 2")), ), _event_record( run_id, "D", now - 25, DagsterEventType.ASSET_MATERIALIZATION, StepMaterializationData(AssetMaterialization(asset_key="mat_3")), ), _event_record( run_id, "D", now, DagsterEventType.STEP_SUCCESS, StepSuccessData(duration_ms=150000.0), ), ]
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, "event_dict", key_type=str) check.str_param(pipeline_name, "pipeline_name") # Get event_type event_type = HANDLED_EVENTS.get(event_dict["__typename"]) if not event_type: raise Exception("unhandled event type %s" % event_dict["__typename"]) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict["stepKey"], event_dict["outputName"]), type_check_data=TypeCheckData( success=event_dict["typeCheck"]["success"], label=event_dict["typeCheck"]["label"], description=event_dict.get("description"), metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries")) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict["inputName"], type_check_data=TypeCheckData( success=event_dict["typeCheck"]["success"], label=event_dict["typeCheck"]["label"], description=event_dict.get("description"), metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries")) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_UP_FOR_RETRY: event_specific_data = StepRetryData( error=error_from_data(event_dict["retryError"]), seconds_to_wait=event_dict["secondsToWait"], ) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict["materialization"] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict["expectationResult"]) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: event_specific_data = StepFailureData( error_from_data(event_dict["error"]), UserFailureData( label=event_dict["failureMetadata"]["label"], description=event_dict["failureMetadata"]["description"], metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries")) or []), ) if event_dict.get("failureMetadata") else None, ) elif event_type == DagsterEventType.ENGINE_EVENT: event_specific_data = EngineEventData( metadata_entries=list( event_metadata_entries(event_dict.get("metadataEntries"))), marker_start=event_dict.get("markerStart"), marker_end=event_dict.get("markerEnd"), error=error_from_data(event_dict["engineError"]) if event_dict.get("engineError") else None, ) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict.get("stepKey"), solid_handle=SolidHandle.from_string(event_dict["solidHandleID"]) if event_dict.get("solidHandleID") else None, # at the time of writing this: # * 'COMPUTE` is the only step kind # * this code should get deleted in the near future as we move away from # dagster-graphql CLI as what we invoke in dask/k8s/etc. step_kind_value="COMPUTE" if event_dict.get("stepKey") else None, logging_tags=None, event_specific_data=event_specific_data, )
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData( error_info, UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) # We should update the GraphQL response so that clients don't need to do this handle parsing. # See: https://github.com/dagster-io/dagster/issues/1559 keys = event_dict['step']['solidHandleID'].split('.') handle = None while keys: handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=handle, step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _load_input_values(step_context): # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, FanInStepInputValuesWrapper): final_values = [] for inner_value in input_value: # inner value is either a store interaction # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(inner_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( inner_value, value_name=input_name), ) final_values.append(inner_value.obj) elif isinstance(inner_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(inner_value)) final_values.append(inner_value.obj) # or the value directly else: final_values.append(inner_value) inputs[input_name] = final_values elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(input_value)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = HANDLED_EVENTS.get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['stepKey'], event_dict['outputName']), type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or [] ), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or [] ), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_UP_FOR_RETRY: event_specific_data = StepRetryData( error=error_from_data(event_dict['retryError']), seconds_to_wait=event_dict['secondsToWait'], ) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization) ) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data(event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: event_specific_data = StepFailureData( error_from_data(event_dict['error']), UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or [] ), ) if event_dict.get('failureMetadata') else None, ) elif event_type == DagsterEventType.ENGINE_EVENT: event_specific_data = EngineEventData( metadata_entries=list(event_metadata_entries(event_dict.get('metadataEntries'))), marker_start=event_dict.get('markerStart'), marker_end=event_dict.get('markerEnd'), error=error_from_data(event_dict['engineError']) if event_dict.get('engineError') else None, ) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict.get('stepKey'), solid_handle=SolidHandle.from_string(event_dict['solidHandleID']) if event_dict.get('solidHandleID') else None, # at the time of writing this: # * 'COMPUTE` is the only step kind # * this code should get deleted in the near future as we move away from # dagster-graphql CLI as what we invoke in dask/k8s/etc. step_kind_value='COMPUTE' if event_dict.get('stepKey') else None, logging_tags=None, event_specific_data=event_specific_data, )
def core_dagster_event_sequence_for_step( step_context: StepExecutionContext, ) -> Iterator[DagsterEvent]: """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", StepExecutionContext) if step_context.previous_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, step_context.previous_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for step_input in step_context.step.step_inputs: input_def = step_input.source.get_input_def(step_context.pipeline_def) dagster_type = input_def.dagster_type if dagster_type.kind == DagsterTypeKind.NOTHING: continue for event_or_input_value in ensure_gen(step_input.source.load_input_object(step_context)): if isinstance(event_or_input_value, DagsterEvent): yield event_or_input_value else: check.invariant(step_input.name not in inputs) inputs[step_input.name] = event_or_input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value) ): yield evt input_lineage = step_context.get_input_lineage() # The core execution loop expects a compute generator in a specific format: a generator that # takes a context and dictionary of inputs as input, yields output events. If a solid definition # was generated from the @solid or @lambda_solid decorator, then compute_fn needs to be coerced # into this format. If the solid definition was created directly, then it is expected that the # compute_fn is already in this format. if isinstance(step_context.solid_def.compute_fn, DecoratedSolidFunction): core_gen = create_solid_compute_wrapper(step_context.solid_def) else: core_gen = step_context.solid_def.compute_fn with time_execution_scope() as timer_result: user_event_sequence = check.generator( execute_core_compute( step_context, inputs, core_gen, ) ) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence(step_context, user_event_sequence) ): if isinstance(user_event, DagsterEvent): yield user_event elif isinstance(user_event, (Output, DynamicOutput)): for evt in _type_check_and_store_output(step_context, user_event, input_lineage): yield evt # for now, I'm ignoring AssetMaterializations yielded manually, but we might want # to do something with these in the above path eventually elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.asset_materialization(step_context, user_event, input_lineage) elif isinstance(user_event, AssetObservation): yield DagsterEvent.asset_observation(step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier".format( event=user_event ) ) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis) )
def test_recover_in_between_steps(): two_op_job = define_two_op_job() events = [ DagsterEvent( DagsterEventType.STEP_START.value, pipeline_name=two_op_job.name, step_key="foo_op", ), DagsterEvent( DagsterEventType.STEP_OUTPUT.value, pipeline_name=two_op_job.name, event_specific_data=StepOutputData( StepOutputHandle(step_key="foo_op", output_name="result") ), step_key="foo_op", ), DagsterEvent( DagsterEventType.STEP_SUCCESS.value, pipeline_name=two_op_job.name, event_specific_data=StepSuccessData(duration_ms=10.0), step_key="foo_op", ), ] with pytest.raises( DagsterInvariantViolationError, match="Execution finished without completing the execution plan", ): with create_execution_plan(two_op_job).start(RetryMode.DISABLED) as active_execution: steps = active_execution.get_steps_to_execute() assert len(steps) == 1 step_1 = steps[0] assert step_1.key == "foo_op" active_execution.handle_event(events[0]) active_execution.handle_event(events[1]) active_execution.handle_event(events[2]) # CRASH!- we've closed the active execution. Now we recover, spinning up a new one with create_execution_plan(two_op_job).start(RetryMode.DISABLED) as active_execution: possibly_in_flight_steps = active_execution.rebuild_from_events(events) assert len(possibly_in_flight_steps) == 1 step_2 = possibly_in_flight_steps[0] assert step_2.key == "bar_op" assert not active_execution.get_steps_to_execute() active_execution.handle_event( DagsterEvent( DagsterEventType.STEP_START.value, pipeline_name=two_op_job.name, step_key="bar_op", ) ) active_execution.handle_event( DagsterEvent( DagsterEventType.STEP_SUCCESS.value, pipeline_name=two_op_job.name, event_specific_data=StepSuccessData(duration_ms=10.0), step_key="bar_op", ) )
def _stats_records(run_id): now = time.time() return [ _event_record(run_id, 'A', now - 325, DagsterEventType.STEP_START), _event_record( run_id, 'A', now - 225, DagsterEventType.STEP_SUCCESS, StepSuccessData(duration_ms=100000.0), ), _event_record(run_id, 'B', now - 225, DagsterEventType.STEP_START), _event_record( run_id, 'B', now - 175, DagsterEventType.STEP_FAILURE, StepFailureData(error=None, user_failure_data=None), ), _event_record(run_id, 'C', now - 175, DagsterEventType.STEP_START), _event_record(run_id, 'C', now - 150, DagsterEventType.STEP_SKIPPED), _event_record(run_id, 'D', now - 150, DagsterEventType.STEP_START), _event_record( run_id, 'D', now - 125, DagsterEventType.STEP_MATERIALIZATION, StepMaterializationData(Materialization(label='mat 1')), ), _event_record( run_id, 'D', now - 100, DagsterEventType.STEP_EXPECTATION_RESULT, StepExpectationResultData(ExpectationResult(success=True, label='exp 1')), ), _event_record( run_id, 'D', now - 75, DagsterEventType.STEP_MATERIALIZATION, StepMaterializationData(Materialization(label='mat 2')), ), _event_record( run_id, 'D', now - 50, DagsterEventType.STEP_EXPECTATION_RESULT, StepExpectationResultData(ExpectationResult(success=False, label='exp 2')), ), _event_record( run_id, 'D', now - 25, DagsterEventType.STEP_MATERIALIZATION, StepMaterializationData(Materialization(label='mat 3')), ), _event_record( run_id, 'D', now, DagsterEventType.STEP_SUCCESS, StepSuccessData(duration_ms=150000.0) ), ]
def core_dagster_event_sequence_for_step( step_context: SystemStepExecutionContext, prior_attempt_count: int) -> Iterator[DagsterEvent]: """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for step_input in step_context.step.step_inputs: input_def = step_input.source.get_input_def(step_context.pipeline_def) dagster_type = input_def.dagster_type if dagster_type.kind == DagsterTypeKind.NOTHING: continue for event_or_input_value in ensure_gen( step_input.source.load_input_object(step_context)): if isinstance(event_or_input_value, DagsterEvent): yield event_or_input_value else: check.invariant(step_input.name not in inputs) inputs[step_input.name] = event_or_input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _type_check_and_store_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def dagster_event_from_dict(event_dict, pipeline_name): check.dict_param(event_dict, 'event_dict', key_type=str) check.str_param(pipeline_name, 'pipeline_name') # Get event_type event_type = _handled_events().get(event_dict['__typename']) if not event_type: raise Exception('unhandled event type %s' % event_dict['__typename']) # Get event_specific_data event_specific_data = None if event_type == DagsterEventType.STEP_OUTPUT: event_specific_data = StepOutputData( step_output_handle=StepOutputHandle(event_dict['step']['key'], event_dict['outputName']), value_repr=event_dict['valueRepr'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_INPUT: event_specific_data = StepInputData( input_name=event_dict['inputName'], value_repr=event_dict['valueRepr'], type_check_data=TypeCheckData( success=event_dict['typeCheck']['success'], label=event_dict['typeCheck']['label'], description=event_dict.get('description'), metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ), ) elif event_type == DagsterEventType.STEP_SUCCESS: event_specific_data = StepSuccessData(0.0) elif event_type == DagsterEventType.STEP_MATERIALIZATION: materialization = event_dict['materialization'] event_specific_data = StepMaterializationData( materialization=materialization_from_data(materialization)) elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT: expectation_result = expectation_result_from_data( event_dict['expectationResult']) event_specific_data = StepExpectationResultData(expectation_result) elif event_type == DagsterEventType.STEP_FAILURE: error_info = SerializableErrorInfo(event_dict['error']['message'], stack=None, cls_name=None) event_specific_data = StepFailureData( error_info, UserFailureData( label=event_dict['failureMetadata']['label'], description=event_dict['failureMetadata']['description'], metadata_entries=list( event_metadata_entries(event_dict.get('metadataEntries')) or []), ) if event_dict.get('failureMetadata') else None, ) return DagsterEvent( event_type_value=event_type.value, pipeline_name=pipeline_name, step_key=event_dict['step']['key'], solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None, None), step_kind_value=event_dict['step']['kind'], logging_tags=None, event_specific_data=event_specific_data, )