def _error_check_step_output_values(step, step_output_values): check.inst_param(step, 'step', ExecutionStep) check.generator_param(step_output_values, 'step_output_values') seen_outputs = set() for step_output_value in step_output_values: if not step.has_step_output(step_output_value.output_name): output_names = list([ output_def.name for output_def in step.solid.definition.output_defs ]) raise DagsterInvariantViolationError( 'Core transform for {step.solid.name} returned an output ' '{step_output_value.output_name} that does not exist. The available ' 'outputs are {output_names}'.format( step=step, step_output_value=step_output_value, output_names=output_names)) if step_output_value.output_name in seen_outputs: raise DagsterInvariantViolationError( 'Core transform for {step.solid.name} returned an output ' '{step_output_value.output_name} multiple times'.format( step=step, step_output_value=step_output_value)) yield step_output_value seen_outputs.add(step_output_value.output_name)
def _step_output_error_checked_user_event_sequence(step_context, user_event_sequence): ''' Process the event sequence to check for invariant violations in the event sequence related to Output events emitted from the compute_fn. This consumes and emits an event sequence. ''' check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.generator_param(user_event_sequence, 'user_event_sequence') step = step_context.step output_names = list([output_def.name for output_def in step.step_outputs]) seen_outputs = set() for user_event in user_event_sequence: if not isinstance(user_event, Output): yield user_event continue # do additional processing on Outputs output = user_event if not step.has_step_output(output.output_name): raise DagsterInvariantViolationError( 'Core compute for solid "{handle}" returned an output ' '"{output.output_name}" that does not exist. The available ' 'outputs are {output_names}'.format( handle=str(step.solid_handle), output=output, output_names=output_names ) ) if output.output_name in seen_outputs: raise DagsterInvariantViolationError( 'Core compute for solid "{handle}" returned an output ' '"{output.output_name}" multiple times'.format( handle=str(step.solid_handle), output=output ) ) yield output seen_outputs.add(output.output_name) for step_output_def in step.step_outputs: if not step_output_def.name in seen_outputs and not step_output_def.optional: if step_output_def.runtime_type.is_nothing: step_context.log.info( 'Emitting implicit Nothing for output "{output}" on solid {solid}'.format( output=step_output_def.name, solid={str(step.solid_handle)} ) ) yield Output(output_name=step_output_def.name, value=None) else: raise DagsterStepOutputNotFoundError( 'Core compute for solid "{handle}" did not return an output ' 'for non-optional output "{step_output_def.name}"'.format( handle=str(step.solid_handle), step_output_def=step_output_def ), step_key=step.key, output_name=step_output_def.name, )
def _error_check_step_outputs(step_context, step_output_iter): check.inst_param(step_context, 'step_context', SystemStepExecutionContext) check.generator_param(step_output_iter, 'step_output_iter') step = step_context.step output_names = list([output_def.name for output_def in step.step_outputs]) seen_outputs = set() for step_output in step_output_iter: if not isinstance(step_output, StepOutputValue): yield step_output continue # do additional processing on StepOutputValues step_output_value = step_output if not step.has_step_output(step_output_value.output_name): raise DagsterInvariantViolationError( 'Core compute for solid "{handle}" returned an output ' '"{step_output_value.output_name}" that does not exist. The available ' 'outputs are {output_names}'.format( handle=str(step.solid_handle), step_output_value=step_output_value, output_names=output_names, )) if step_output_value.output_name in seen_outputs: raise DagsterInvariantViolationError( 'Core compute for solid "{handle}" returned an output ' '"{step_output_value.output_name}" multiple times'.format( handle=str(step.solid_handle), step_output_value=step_output_value)) yield step_output_value seen_outputs.add(step_output_value.output_name) for step_output_def in step.step_outputs: if not step_output_def.name in seen_outputs and not step_output_def.optional: if step_output_def.runtime_type.is_nothing: step_context.log.info( 'Emitting implicit Nothing for output "{output}" on solid {solid}' .format(output=step_output_def.name, solid={str(step.solid_handle)})) yield StepOutputValue(output_name=step_output_def.name, value=None) else: raise DagsterStepOutputNotFoundError( 'Core compute for solid "{handle}" did not return an output ' 'for non-optional output "{step_output_def.name}"'.format( handle=str(step.solid_handle), step_output_def=step_output_def), step_key=step.key, output_name=step_output_def.name, )
def backoff( fn, retry_on, args=None, kwargs=None, max_retries=BACKOFF_MAX_RETRIES, delay_generator=backoff_delay_generator(), ): """Straightforward backoff implementation. Note that this doesn't implement any jitter on the delays, so probably won't be appropriate for very parallel situations. Args: fn (Callable): The function to wrap in a backoff/retry loop. retry_on (Tuple[Exception, ...]): The exception classes on which to retry. Note that we don't (yet) have any support for matching the exception messages. args (Optional[List[Any]]): Positional args to pass to the callable. kwargs (Optional[Dict[str, Any]]): Keyword args to pass to the callable. max_retries (Optional[Int]): The maximum number of times to retry a failed fn call. Set to 0 for no backoff. Default: 4 delay_generator (Generator[float, None, None]): Generates the successive delays between retry attempts. """ check.callable_param(fn, "fn") retry_on = check.tuple_param(retry_on, "retry_on") args = check.opt_list_param(args, "args") kwargs = check.opt_dict_param(kwargs, "kwargs", key_type=str) check.int_param(max_retries, "max_retries") check.generator_param(delay_generator, "delay_generator") retries = 0 to_raise = None try: return fn(*args, **kwargs) except retry_on as exc: to_raise = exc while retries < max_retries: time.sleep(six.next(delay_generator)) try: return fn(*args, **kwargs) except retry_on as exc: retries += 1 to_raise = exc continue raise to_raise
def _error_check_step_outputs(step, step_output_iter): check.inst_param(step, 'step', ExecutionStep) check.generator_param(step_output_iter, 'step_output_iter') output_names = list([output_def.name for output_def in step.step_outputs]) seen_outputs = set() for step_output in step_output_iter: if not isinstance(step_output, StepOutputValue): yield step_output continue # do additional processing on StepOutputValues step_output_value = step_output if not step.has_step_output(step_output_value.output_name): raise DagsterInvariantViolationError( 'Core transform for solid "{step.solid.name}" returned an output ' '"{step_output_value.output_name}" that does not exist. The available ' 'outputs are {output_names}'.format( step=step, step_output_value=step_output_value, output_names=output_names ) ) if step_output_value.output_name in seen_outputs: raise DagsterInvariantViolationError( 'Core transform for solid "{step.solid.name}" returned an output ' '"{step_output_value.output_name}" multiple times'.format( step=step, step_output_value=step_output_value ) ) yield step_output_value seen_outputs.add(step_output_value.output_name) for step_output_def in step.step_outputs: if not step_output_def.name in seen_outputs and not step_output_def.optional: raise DagsterStepOutputNotFoundError( 'Core transform for solid "{step.solid.name}" did not return an output ' 'for non-optional output "{step_output_def.name}"'.format( step=step, step_output_def=step_output_def ), step_key=step.key, output_name=step_output_def.name, )
def test_generator_param(): def _test_gen(): yield 1 assert check.generator_param(_test_gen(), "gen") gen = _test_gen() assert check.generator(gen) assert list(gen) == [1] assert check.generator(gen) assert list(gen) == [] with pytest.raises(ParameterCheckError): assert check.generator_param(list(gen), "gen") with pytest.raises(ParameterCheckError): assert check.generator_param(None, "gen") with pytest.raises(ParameterCheckError): assert check.generator_param(_test_gen, "gen")
def _step_output_error_checked_user_event_sequence( step_context: SystemStepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion] ) -> Iterator[SolidOutputUnion]: """ Process the event sequence to check for invariant violations in the event sequence related to Output events emitted from the compute_fn. This consumes and emits an event sequence. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.generator_param(user_event_sequence, "user_event_sequence") step = step_context.step output_names = list([output_def.name for output_def in step.step_outputs]) seen_outputs: Set[str] = set() seen_mapping_keys: Dict[str, Set[str]] = defaultdict(set) for user_event in user_event_sequence: if not isinstance(user_event, (Output, DynamicOutput)): yield user_event continue # do additional processing on Outputs output = user_event if not step.has_step_output(output.output_name): raise DagsterInvariantViolationError( 'Core compute for solid "{handle}" returned an output ' '"{output.output_name}" that does not exist. The available ' "outputs are {output_names}".format(handle=str( step.solid_handle), output=output, output_names=output_names)) step_output = step.step_output_named(output.output_name) output_def = step_context.pipeline_def.get_solid( step_output.solid_handle).output_def_named(step_output.name) if isinstance(output, Output): if output.output_name in seen_outputs: raise DagsterInvariantViolationError( 'Compute for solid "{handle}" returned an output ' '"{output.output_name}" multiple times'.format( handle=str(step.solid_handle), output=output)) if output_def.is_dynamic: raise DagsterInvariantViolationError( f'Compute for solid "{step.solid_handle}" for output "{output.output_name}" ' "defined as dynamic must yield DynamicOutput, got Output.") else: if not output_def.is_dynamic: raise DagsterInvariantViolationError( f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput, ' "but did not use DynamicOutputDefinition.") if output.mapping_key in seen_mapping_keys[output.output_name]: raise DagsterInvariantViolationError( f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput with ' f'mapping_key "{output.mapping_key}" multiple times.') seen_mapping_keys[output.output_name].add(output.mapping_key) yield output seen_outputs.add(output.output_name) for step_output in step.step_outputs: step_output_def = step_context.solid_def.output_def_named( step_output.name) if not step_output_def.name in seen_outputs and not step_output_def.optional: if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING: step_context.log.info( 'Emitting implicit Nothing for output "{output}" on solid {solid}' .format(output=step_output_def.name, solid={str(step.solid_handle)})) yield Output(output_name=step_output_def.name, value=None) else: raise DagsterStepOutputNotFoundError( 'Core compute for solid "{handle}" did not return an output ' 'for non-optional output "{step_output_def.name}"'.format( handle=str(step.solid_handle), step_output_def=step_output_def), step_key=step.key, output_name=step_output_def.name, )
def _step_output_error_checked_user_event_sequence( step_context: StepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion] ) -> Iterator[SolidOutputUnion]: """ Process the event sequence to check for invariant violations in the event sequence related to Output events emitted from the compute_fn. This consumes and emits an event sequence. """ check.inst_param(step_context, "step_context", StepExecutionContext) check.generator_param(user_event_sequence, "user_event_sequence") step = step_context.step op_label = step_context.describe_op() output_names = list([output_def.name for output_def in step.step_outputs]) for user_event in user_event_sequence: if not isinstance(user_event, (Output, DynamicOutput)): yield user_event continue # do additional processing on Outputs output = user_event if not step.has_step_output(cast(str, output.output_name)): raise DagsterInvariantViolationError( f'Core compute for {op_label} returned an output "{output.output_name}" that does ' f"not exist. The available outputs are {output_names}" ) step_output = step.step_output_named(cast(str, output.output_name)) output_def = step_context.pipeline_def.get_solid(step_output.solid_handle).output_def_named( step_output.name ) if isinstance(output, Output): if step_context.has_seen_output(output.output_name): raise DagsterInvariantViolationError( f'Compute for {op_label} returned an output "{output.output_name}" multiple ' "times" ) if output_def.is_dynamic: raise DagsterInvariantViolationError( f'Compute for {op_label} for output "{output.output_name}" defined as dynamic ' "must yield DynamicOutput, got Output." ) step_context.observe_output(output.output_name) metadata = step_context.get_output_metadata(output.output_name) output = Output( value=output.value, output_name=output.output_name, metadata_entries=output.metadata_entries + normalize_metadata(cast(Dict[str, Any], metadata), []), ) else: if not output_def.is_dynamic: raise DagsterInvariantViolationError( f"Compute for {op_label} yielded a DynamicOutput, but did not use " "DynamicOutputDefinition." ) if step_context.has_seen_output(output.output_name, output.mapping_key): raise DagsterInvariantViolationError( f"Compute for {op_label} yielded a DynamicOutput with mapping_key " f'"{output.mapping_key}" multiple times.' ) step_context.observe_output(output.output_name, output.mapping_key) metadata = step_context.get_output_metadata( output.output_name, mapping_key=output.mapping_key ) output = DynamicOutput( value=output.value, output_name=output.output_name, metadata_entries=output.metadata_entries + normalize_metadata(cast(Dict[str, Any], metadata), []), mapping_key=output.mapping_key, ) yield output for step_output in step.step_outputs: step_output_def = step_context.solid_def.output_def_named(step_output.name) if not step_context.has_seen_output(step_output_def.name) and not step_output_def.optional: if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING: step_context.log.info( f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}' ) yield Output(output_name=step_output_def.name, value=None) elif not step_output_def.is_dynamic: raise DagsterStepOutputNotFoundError( ( f"Core compute for {op_label} did not return an output for non-optional " f'output "{step_output_def.name}"' ), step_key=step.key, output_name=step_output_def.name, )