def _execute_steps_core_loop(step_context, inputs, intermediates_manager): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(inputs, 'inputs', key_type=str) check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager) evaluated_inputs = {} # do runtime type checks of inputs versus step inputs for input_name, input_value in inputs.items(): evaluated_inputs[input_name] = _get_evaluated_input( step_context.step, input_name, input_value) yield DagsterEvent.step_start_event(step_context) with time_execution_scope() as timer_result: step_output_iterator = check.generator( _iterate_step_outputs_within_boundary(step_context, evaluated_inputs)) for step_output in check.generator( _error_check_step_outputs(step_context, step_output_iterator)): if isinstance(step_output, StepOutputValue): yield _create_step_output_event(step_context, step_output, intermediates_manager) elif isinstance(step_output, Materialization): yield DagsterEvent.step_materialization(step_context, step_output) elif isinstance(step_output, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, step_output) else: check.failed( 'Unexpected step_output {step_output}, should have been caught earlier' .format(step_output=step_output)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _core_dagster_event_sequence_for_step(step_context): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediates_manager( step_context).items(): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, Materialization): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( 'Unexpected event {event}, should have been caught earlier' .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def _core_dagster_event_sequence_for_step(step_context, inputs, intermediates_manager): ''' Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.dict_param(inputs, 'inputs', key_type=str) check.inst_param(intermediates_manager, 'intermediates_manager', IntermediatesManager) evaluated_inputs = {} # do runtime type checks of inputs versus step inputs for input_name, input_value in inputs.items(): evaluated_inputs[input_name] = _get_evaluated_input( step_context.step, input_name, input_value ) yield DagsterEvent.step_start_event(step_context) with time_execution_scope() as timer_result: event_sequence = check.generator( _event_sequence_for_step_compute_fn(step_context, evaluated_inputs) ) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for event in check.generator( _step_output_error_checked_event_sequence(step_context, event_sequence) ): if isinstance(event, Result): yield _create_step_output_event(step_context, event, intermediates_manager) elif isinstance(event, Materialization): yield DagsterEvent.step_materialization(step_context, event) elif isinstance(event, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, event) else: check.failed( 'Unexpected event {event}, should have been caught earlier'.format(event=event) ) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis) )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _input_values_from_intermediate_storage( step_context): if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, MultipleStepOutputsListWrapper): for op in input_value: yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(op, value_name=input_name)) inputs[input_name] = [op.obj for op in input_value] else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, Output): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) # We only want to log exactly one step success event or failure event if possible, # so wait to handle any interrupts (that normally log a failure event) until the success # event has finished with delay_interrupts(): yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def core_dagster_event_sequence_for_step( step_context: SystemStepExecutionContext, prior_attempt_count: int) -> Iterator[DagsterEvent]: """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for step_input in step_context.step.step_inputs: input_def = step_input.source.get_input_def(step_context.pipeline_def) dagster_type = input_def.dagster_type if dagster_type.kind == DagsterTypeKind.NOTHING: continue for event_or_input_value in ensure_gen( step_input.source.load_input_object(step_context)): if isinstance(event_or_input_value, DagsterEvent): yield event_or_input_value else: check.invariant(step_input.name not in inputs) inputs[step_input.name] = event_or_input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _type_check_and_store_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))
def core_dagster_event_sequence_for_step( step_context: StepExecutionContext, ) -> Iterator[DagsterEvent]: """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", StepExecutionContext) if step_context.previous_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, step_context.previous_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for step_input in step_context.step.step_inputs: input_def = step_input.source.get_input_def(step_context.pipeline_def) dagster_type = input_def.dagster_type if dagster_type.kind == DagsterTypeKind.NOTHING: continue for event_or_input_value in ensure_gen(step_input.source.load_input_object(step_context)): if isinstance(event_or_input_value, DagsterEvent): yield event_or_input_value else: check.invariant(step_input.name not in inputs) inputs[step_input.name] = event_or_input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value) ): yield evt input_lineage = step_context.get_input_lineage() # The core execution loop expects a compute generator in a specific format: a generator that # takes a context and dictionary of inputs as input, yields output events. If a solid definition # was generated from the @solid or @lambda_solid decorator, then compute_fn needs to be coerced # into this format. If the solid definition was created directly, then it is expected that the # compute_fn is already in this format. if isinstance(step_context.solid_def.compute_fn, DecoratedSolidFunction): core_gen = create_solid_compute_wrapper(step_context.solid_def) else: core_gen = step_context.solid_def.compute_fn with time_execution_scope() as timer_result: user_event_sequence = check.generator( execute_core_compute( step_context, inputs, core_gen, ) ) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence(step_context, user_event_sequence) ): if isinstance(user_event, DagsterEvent): yield user_event elif isinstance(user_event, (Output, DynamicOutput)): for evt in _type_check_and_store_output(step_context, user_event, input_lineage): yield evt # for now, I'm ignoring AssetMaterializations yielded manually, but we might want # to do something with these in the above path eventually elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.asset_materialization(step_context, user_event, input_lineage) elif isinstance(user_event, AssetObservation): yield DagsterEvent.asset_observation(step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result(step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier".format( event=user_event ) ) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis) )
def core_dagster_event_sequence_for_step(step_context, prior_attempt_count): """ Execute the step within the step_context argument given the in-memory events. This function yields a sequence of DagsterEvents, but without catching any exceptions that have bubbled up during the computation of the step. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.int_param(prior_attempt_count, "prior_attempt_count") if prior_attempt_count > 0: yield DagsterEvent.step_restarted_event(step_context, prior_attempt_count) else: yield DagsterEvent.step_start_event(step_context) inputs = {} for input_name, input_value in _load_input_values(step_context): # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(input_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable(input_value, value_name=input_name)) inputs[input_name] = input_value.obj elif isinstance(input_value, FanInStepInputValuesWrapper): final_values = [] for inner_value in input_value: # inner value is either a store interaction # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043 if isinstance(inner_value, ObjectStoreOperation): yield DagsterEvent.object_store_operation( step_context, ObjectStoreOperation.serializable( inner_value, value_name=input_name), ) final_values.append(inner_value.obj) elif isinstance(inner_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(inner_value)) final_values.append(inner_value.obj) # or the value directly else: final_values.append(inner_value) inputs[input_name] = final_values elif isinstance(input_value, AssetStoreOperation): yield DagsterEvent.asset_store_operation( step_context, AssetStoreOperation.serializable(input_value)) inputs[input_name] = input_value.obj else: inputs[input_name] = input_value for input_name, input_value in inputs.items(): for evt in check.generator( _type_checked_event_sequence_for_input(step_context, input_name, input_value)): yield evt with time_execution_scope() as timer_result: user_event_sequence = check.generator( _user_event_sequence_for_step_compute_fn(step_context, inputs)) # It is important for this loop to be indented within the # timer block above in order for time to be recorded accurately. for user_event in check.generator( _step_output_error_checked_user_event_sequence( step_context, user_event_sequence)): if isinstance(user_event, (Output, DynamicOutput)): for evt in _create_step_events_for_output( step_context, user_event): yield evt elif isinstance(user_event, (AssetMaterialization, Materialization)): yield DagsterEvent.step_materialization( step_context, user_event) elif isinstance(user_event, ExpectationResult): yield DagsterEvent.step_expectation_result( step_context, user_event) else: check.failed( "Unexpected event {event}, should have been caught earlier" .format(event=user_event)) yield DagsterEvent.step_success_event( step_context, StepSuccessData(duration_ms=timer_result.millis))