def resolve_to_config_type(dagster_type): from dagster.core.types.wrapping.mapping import ( remap_python_builtin_for_config, is_supported_config_python_builtin, ) from dagster.core.types.runtime.runtime_type import RuntimeType if _is_config_type_class(dagster_type): check.param_invariant( False, 'dagster_type', 'Cannot pass a config type class to resolve_to_config_type. Got {dagster_type}' .format(dagster_type=dagster_type), ) check.invariant( not (isinstance(dagster_type, type) and issubclass(dagster_type, RuntimeType)), 'Cannot resolve a runtime type to a config type', ) if is_typing_type(dagster_type): raise DagsterInvariantViolationError(( 'You have passed in {dagster_type} in the config system. Types from ' 'the typing module in python are not allowed in the config system. ' 'You must use types that are imported from dagster or primitive types ' 'such as bool, int, etc.').format(dagster_type=dagster_type)) if isinstance(dagster_type, (WrappingSetType, DagsterSetApi)): raise DagsterInvalidDefinitionError( 'Cannot use Set in the context of a config field. Please use List instead.' ) if isinstance(dagster_type, (WrappingTupleType, DagsterTupleApi)): raise DagsterInvalidDefinitionError( 'Cannot use Tuple in the context of a config field. Please use List instead.' ) # Short circuit if it's already a Config Type if isinstance(dagster_type, ConfigType): return dagster_type # If we are passed here either: # 1) We have been passed a python builtin # 2) We have been a dagster wrapping type that needs to be convert its config varient # e.g. dagster.List # 2) We have been passed an invalid thing. We return False to signify this. It is # up to callers to report a reasonable error. if is_supported_config_python_builtin(dagster_type): return remap_python_builtin_for_config(dagster_type) if dagster_type is None: return ConfigAnyInstance if BuiltinEnum.contains(dagster_type): return ConfigType.from_builtin_enum(dagster_type) if isinstance(dagster_type, (WrappingListType, DagsterListApi)): return resolve_to_config_list(dagster_type) if isinstance(dagster_type, WrappingNullableType): return resolve_to_config_nullable(dagster_type) # This means that this is an error and we are return False to a callsite # We do the error reporting there because those callsites have more context return False
def resource_def(self) -> Optional[ResourceDefinition]: raise DagsterInvariantViolationError( "UnboundInitLoggerContext has not been validated against a logger definition." )
def _check_execute_pipeline_args(pipeline, run_config, mode, preset, tags, solid_selection=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.invariant( not (mode is not None and preset is not None), "You may set only one of `mode` (got {mode}) or `preset` (got {preset})." .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) if pipeline_preset.run_config is not None: check.invariant( (not run_config) or (pipeline_preset.run_config == run_config), "The environment set in preset '{preset}' does not agree with the environment " "passed in the `run_config` argument.".format(preset=preset), ) run_config = pipeline_preset.run_config # load solid_selection from preset if pipeline_preset.solid_selection is not None: check.invariant( solid_selection is None or solid_selection == pipeline_preset.solid_selection, "The solid_selection set in preset '{preset}', {preset_subset}, does not agree with " "the `solid_selection` argument: {solid_selection}".format( preset=preset, preset_subset=pipeline_preset.solid_selection, solid_selection=solid_selection, ), ) solid_selection = pipeline_preset.solid_selection check.invariant( mode is None or mode == pipeline_preset.mode, "Mode {mode} does not agree with the mode set in preset '{preset}': " "('{preset_mode}')".format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode tags = merge_dicts(pipeline_preset.tags, tags) if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( "You have attempted to execute pipeline {name} with mode {mode}. " "Available modes: {modes}").format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if pipeline_def.is_multi_mode: raise DagsterInvariantViolationError(( "Pipeline {name} has multiple modes (Available modes: {modes}) and you have " "attempted to execute it without specifying a mode. Set " "mode property on the PipelineRun object.").format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(pipeline_def.tags, tags) # generate pipeline subset from the given solid_selection if solid_selection: pipeline = pipeline.subset_for_execution(solid_selection) return ( pipeline, run_config, mode, tags, pipeline.solids_to_execute, solid_selection, )
def get_pipeline_subset_def(self, solids_to_execute): raise DagsterInvariantViolationError( 'Pipeline subsets may not be subset again.')
def _step_output_error_checked_user_event_sequence( step_context: SystemStepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion] ) -> Iterator[SolidOutputUnion]: """ Process the event sequence to check for invariant violations in the event sequence related to Output events emitted from the compute_fn. This consumes and emits an event sequence. """ check.inst_param(step_context, "step_context", SystemStepExecutionContext) check.generator_param(user_event_sequence, "user_event_sequence") step = step_context.step output_names = list([output_def.name for output_def in step.step_outputs]) seen_outputs: Set[str] = set() seen_mapping_keys: Dict[str, Set[str]] = defaultdict(set) for user_event in user_event_sequence: if not isinstance(user_event, (Output, DynamicOutput)): yield user_event continue # do additional processing on Outputs output = user_event if not step.has_step_output(output.output_name): raise DagsterInvariantViolationError( 'Core compute for solid "{handle}" returned an output ' '"{output.output_name}" that does not exist. The available ' "outputs are {output_names}".format(handle=str( step.solid_handle), output=output, output_names=output_names)) step_output = step.step_output_named(output.output_name) output_def = step_context.pipeline_def.get_solid( step_output.solid_handle).output_def_named(step_output.name) if isinstance(output, Output): if output.output_name in seen_outputs: raise DagsterInvariantViolationError( 'Compute for solid "{handle}" returned an output ' '"{output.output_name}" multiple times'.format( handle=str(step.solid_handle), output=output)) if output_def.is_dynamic: raise DagsterInvariantViolationError( f'Compute for solid "{step.solid_handle}" for output "{output.output_name}" ' "defined as dynamic must yield DynamicOutput, got Output.") else: if not output_def.is_dynamic: raise DagsterInvariantViolationError( f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput, ' "but did not use DynamicOutputDefinition.") if output.mapping_key in seen_mapping_keys[output.output_name]: raise DagsterInvariantViolationError( f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput with ' f'mapping_key "{output.mapping_key}" multiple times.') seen_mapping_keys[output.output_name].add(output.mapping_key) yield output seen_outputs.add(output.output_name) for step_output in step.step_outputs: step_output_def = step_context.solid_def.output_def_named( step_output.name) if not step_output_def.name in seen_outputs and not step_output_def.optional: if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING: step_context.log.info( 'Emitting implicit Nothing for output "{output}" on solid {solid}' .format(output=step_output_def.name, solid={str(step.solid_handle)})) yield Output(output_name=step_output_def.name, value=None) else: raise DagsterStepOutputNotFoundError( 'Core compute for solid "{handle}" did not return an output ' 'for non-optional output "{step_output_def.name}"'.format( handle=str(step.solid_handle), step_output_def=step_output_def), step_key=step.key, output_name=step_output_def.name, )
def _check_execute_pipeline_args(pipeline, environment_dict, mode, preset, tags, solid_subset, instance): pipeline, pipeline_def = _check_pipeline(pipeline) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) check.opt_list_param(solid_subset, 'solid_subset', of_type=str) if solid_subset: # resolve solid selection queries to a list of qualified solid names parsed_solid_subset = parse_solid_subset(pipeline_def, solid_subset) if len(parsed_solid_subset) == 0: raise DagsterInvariantViolationError( 'No qualified solid subset found for solid_subset={input}'. format(input=solid_subset)) solid_subset = parsed_solid_subset if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) if pipeline_preset.environment_dict is not None: check.invariant( (not environment_dict) or (pipeline_preset.environment_dict == environment_dict), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `environment_dict` argument.'.format( preset=preset), ) environment_dict = pipeline_preset.environment_dict if pipeline_preset.solid_subset is not None: check.invariant( solid_subset is None or solid_subset == pipeline_preset.solid_subset, 'The solid_subset set in preset \'{preset}\', {preset_subset}, does not agree with ' 'the `solid_subset` argument: {solid_subset}'.format( preset=preset, preset_subset=pipeline_preset.solid_subset, solid_subset=solid_subset, ), ) solid_subset = pipeline_preset.solid_subset check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if pipeline_def.is_multi_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(pipeline_def.tags, tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() if solid_subset: pipeline = pipeline.subset_for_execution(solid_subset) pipeline_def = pipeline.get_definition() else: solid_subset = pipeline_def.solid_subset return (pipeline, pipeline_def, environment_dict, instance, mode, tags, solid_subset)
def get_pipeline_subset_def( self, solids_to_execute: AbstractSet[str]) -> "PipelineSubsetDefinition": raise DagsterInvariantViolationError( "Pipeline subsets may not be subset again.")
def _step_output_error_checked_user_event_sequence( step_context: StepExecutionContext, user_event_sequence: Iterator[SolidOutputUnion] ) -> Iterator[SolidOutputUnion]: """ Process the event sequence to check for invariant violations in the event sequence related to Output events emitted from the compute_fn. This consumes and emits an event sequence. """ check.inst_param(step_context, "step_context", StepExecutionContext) check.generator_param(user_event_sequence, "user_event_sequence") step = step_context.step op_label = step_context.describe_op() output_names = list([output_def.name for output_def in step.step_outputs]) for user_event in user_event_sequence: if not isinstance(user_event, (Output, DynamicOutput)): yield user_event continue # do additional processing on Outputs output = user_event if not step.has_step_output(cast(str, output.output_name)): raise DagsterInvariantViolationError( f'Core compute for {op_label} returned an output "{output.output_name}" that does ' f"not exist. The available outputs are {output_names}") step_output = step.step_output_named(cast(str, output.output_name)) output_def = step_context.pipeline_def.get_solid( step_output.solid_handle).output_def_named(step_output.name) if isinstance(output, Output): if step_context.has_seen_output(output.output_name): raise DagsterInvariantViolationError( f'Compute for {op_label} returned an output "{output.output_name}" multiple ' "times") if output_def.is_dynamic: raise DagsterInvariantViolationError( f'Compute for {op_label} for output "{output.output_name}" defined as dynamic ' "must yield DynamicOutput, got Output.") step_context.observe_output(output.output_name) metadata = step_context.get_output_metadata(output.output_name) output = Output( value=output.value, output_name=output.output_name, metadata_entries=output.metadata_entries + normalize_metadata(cast(Dict[str, Any], metadata), []), ) else: if not output_def.is_dynamic: raise DagsterInvariantViolationError( f"Compute for {op_label} yielded a DynamicOutput, but did not use " "DynamicOutputDefinition.") if step_context.has_seen_output(output.output_name, output.mapping_key): raise DagsterInvariantViolationError( f"Compute for {op_label} yielded a DynamicOutput with mapping_key " f'"{output.mapping_key}" multiple times.') step_context.observe_output(output.output_name, output.mapping_key) metadata = step_context.get_output_metadata( output.output_name, mapping_key=output.mapping_key) output = DynamicOutput( value=output.value, output_name=output.output_name, metadata_entries=output.metadata_entries + normalize_metadata(cast(Dict[str, Any], metadata), []), mapping_key=output.mapping_key, ) yield output for step_output in step.step_outputs: step_output_def = step_context.solid_def.output_def_named( step_output.name) if not step_context.has_seen_output( step_output_def.name) and not step_output_def.optional: if step_output_def.dagster_type.is_nothing: step_context.log.info( f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}' ) yield Output(output_name=step_output_def.name, value=None) elif not step_output_def.is_dynamic: raise DagsterStepOutputNotFoundError( (f"Core compute for {op_label} did not return an output for non-optional " f'output "{step_output_def.name}"'), step_key=step.key, output_name=step_output_def.name, )
def schedule_partition_range( start, end, cron_schedule, fmt, timezone, execution_time_to_partition_fn, ): check.inst_param(start, "start", datetime.datetime) check.opt_inst_param(end, "end", datetime.datetime) check.str_param(cron_schedule, "cron_schedule") check.str_param(fmt, "fmt") check.opt_str_param(timezone, "timezone") check.callable_param(execution_time_to_partition_fn, "execution_time_to_partition_fn") if end and start > end: raise DagsterInvariantViolationError( 'Selected date range start "{start}" is after date range end "{end}' .format( start=start.strftime(fmt), end=end.strftime(fmt), )) def get_schedule_range_partitions(current_time=None): check.opt_inst_param(current_time, "current_time", datetime.datetime) tz = timezone if timezone else pendulum.now().timezone.name _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else pendulum.instance(start, tz=tz)) if end: _end = end elif current_time: _end = current_time else: _end = pendulum.now(tz) # coerce to the definition timezone if isinstance(_end, pendulum.Pendulum): _end = _end.in_tz(tz) else: _end = pendulum.instance(_end, tz=tz) end_timestamp = _end.timestamp() partitions = [] for next_time in schedule_execution_time_iterator( _start.timestamp(), cron_schedule, tz): partition_time = execution_time_to_partition_fn(next_time) if partition_time.timestamp() > end_timestamp: break if partition_time.timestamp() < _start.timestamp(): continue partitions.append( Partition(value=partition_time, name=partition_time.strftime(fmt))) return partitions[:-1] return get_schedule_range_partitions
def date_partition_range( start, end=None, delta=None, delta_range="days", fmt=None, inclusive=False, timezone=None, ): """ Utility function that returns a partition generating function to be used in creating a `PartitionSet` definition. Args: start (datetime): Datetime capturing the start of the time range. end (Optional(datetime)): Datetime capturing the end of the partition. By default, the current time is used. The range is not inclusive of the end value. delta (Optional(timedelta)): Timedelta representing the time duration of each partition. DEPRECATED: use 'delta_range' instead, which handles timezone transitions correctly. delta_range (Optional(str)): string representing the time duration of each partition. Must be a valid argument to pendulum.period.range ("days", "hours", "months", etc.). fmt (Optional(str)): Format string to represent each partition by its start time inclusive (Optional(bool)): By default, the partition set only contains date interval partitions for which the end time of the interval is less than current time. In other words, the partition set contains date interval partitions that are completely in the past. If inclusive is set to True, then the partition set will include all date interval partitions for which the start time of the interval is less than the current time. timezone (Optional(str)): Timezone in which the partition values should be expressed. Returns: Callable[[], List[Partition]] """ check.inst_param(start, "start", datetime.datetime) check.opt_inst_param(end, "end", datetime.datetime) check.opt_str_param(delta_range, "delta_range") fmt = check.opt_str_param(fmt, "fmt", default=DEFAULT_DATE_FORMAT) check.opt_str_param(timezone, "timezone") check.opt_inst_param(delta, "delta", (datetime.timedelta, relativedelta)) if delta: check.invariant(not delta_range, "cannot supply both 'delta' and 'delta_range' parameters") warnings.warn( "The 'delta' argument to date_partition_range has been deprecated - use 'delta_range' " "instead, which has better support for timezones. For example, if you previously " "passed in delta=timedelta(days=1), pass in delta_range='days' instead. The 'delta' " "argument will be removed in the dagster 0.10.0 release." ) delta_range, delta_amount = _delta_to_delta_range(delta) else: check.invariant(delta_range, "Must include either a 'delta' or 'delta_range' parameter") delta_amount = 1 if end and start > end: raise DagsterInvariantViolationError( 'Selected date range start "{start}" is after date range end "{end}'.format( start=start.strftime(fmt), end=end.strftime(fmt), ) ) def get_date_range_partitions(): tz = timezone if timezone else pendulum.now().timezone.name _start = ( start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else pendulum.instance(start, tz=tz) ) if not end: _end = pendulum.now(tz) elif isinstance(end, pendulum.Pendulum): _end = end.in_tz(tz) else: _end = pendulum.instance(end, tz=tz) period = pendulum.period(_start, _end) date_names = [ Partition(value=current, name=current.strftime(fmt)) for current in period.range(delta_range, delta_amount) ] # We don't include the last element here by default since we only want # fully completed intervals, and the _end time is in the middle of the interval # represented by the last element of date_names if inclusive: return date_names return date_names[:-1] return get_date_range_partitions
def _store_output( step_context: StepExecutionContext, step_output_handle: StepOutputHandle, output: Union[Output, DynamicOutput], input_lineage: List[AssetLineageInfo], ) -> Iterator[DagsterEvent]: output_def = step_context.solid_def.output_def_named( step_output_handle.output_name) output_manager = step_context.get_io_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) manager_materializations = [] manager_metadata_entries: List[Union[PartitionMetadataEntry, MetadataEntry]] = [] # output_manager.handle_output is either a generator function, or a normal function with or # without a return value. In the case that handle_output is a normal function, we need to # catch errors should they be raised before a return value. We can do this by wrapping # handle_output in a generator so that errors will be caught within iterate_with_context. if not inspect.isgeneratorfunction(output_manager.handle_output): def _gen_fn(): gen_output = output_manager.handle_output(output_context, output.value) for event in output_context.consume_events(): yield event if gen_output: yield gen_output handle_output_gen = _gen_fn() else: handle_output_gen = output_manager.handle_output( output_context, output.value) for elt in iterate_with_context( lambda: solid_execution_error_boundary( DagsterExecutionHandleOutputError, msg_fn=lambda: (f'Error occurred while handling output "{output_context.name}" of ' f'step "{step_context.step.key}":'), step_context=step_context, step_key=step_context.step.key, output_name=output_context.name, ), handle_output_gen, ): for event in output_context.consume_events(): yield event manager_metadata_entries.extend( output_context.consume_logged_metadata_entries()) if isinstance(elt, DagsterEvent): yield elt elif isinstance(elt, AssetMaterialization): manager_materializations.append(elt) elif isinstance(elt, (MetadataEntry, PartitionMetadataEntry)): experimental_functionality_warning( "Yielding metadata from an IOManager's handle_output() function" ) manager_metadata_entries.append(elt) else: raise DagsterInvariantViolationError( f"IO manager on output {output_def.name} has returned " f"value {elt} of type {type(elt).__name__}. The return type can only be " "one of AssetMaterialization, MetadataEntry, PartitionMetadataEntry." ) for event in output_context.consume_events(): yield event manager_metadata_entries.extend( output_context.consume_logged_metadata_entries()) # do not alter explicitly created AssetMaterializations for materialization in manager_materializations: if materialization.metadata_entries and manager_metadata_entries: raise DagsterInvariantViolationError( f"When handling output '{output_context.name}' of {output_context.solid_def.node_type_str} '{output_context.solid_def.name}', received a materialization with metadata, while context.add_output_metadata was used within the same call to handle_output. Due to potential conflicts, this is not allowed. Please specify metadata in one place within the `handle_output` function." ) if manager_metadata_entries: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=ExperimentalWarning) materialization = AssetMaterialization( asset_key=materialization.asset_key, description=materialization.description, metadata_entries=manager_metadata_entries, partition=materialization.partition, tags=materialization.tags, metadata=None, ) yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage) asset_key, partitions = _asset_key_and_partitions_for_output( output_context, output_def, output_manager) if asset_key: for materialization in _get_output_asset_materializations( asset_key, partitions, output, output_def, manager_metadata_entries, ): yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage) yield DagsterEvent.handled_output( step_context, output_name=step_output_handle.output_name, manager_key=output_def.io_manager_key, metadata_entries=[ entry for entry in manager_metadata_entries if isinstance(entry, MetadataEntry) ], )
def rebuild_from_snapshot(pipeline_name, execution_plan_snapshot): if not execution_plan_snapshot.can_reconstruct_plan: raise DagsterInvariantViolationError( "Tried to reconstruct an old ExecutionPlanSnapshot that was created before snapshots " "had enough information to fully reconstruct the ExecutionPlan" ) step_dict = {} for step_snap in execution_plan_snapshot.steps: input_snaps = step_snap.inputs output_snaps = step_snap.outputs step_inputs = [ ExecutionPlan.rebuild_step_input(step_input_snap) for step_input_snap in input_snaps ] step_outputs = [ StepOutput( step_output_snap.solid_handle, step_output_snap.name, step_output_snap.dagster_type_key, step_output_snap.properties, ) for step_output_snap in output_snaps ] if step_snap.kind == StepKind.COMPUTE: step = ExecutionStep( step_snap.step_handle, pipeline_name, step_inputs, step_outputs, step_snap.tags, ) elif step_snap.kind == StepKind.UNRESOLVED_MAPPED: step = UnresolvedMappedExecutionStep( step_snap.step_handle, pipeline_name, step_inputs, step_outputs, step_snap.tags, ) elif step_snap.kind == StepKind.UNRESOLVED_COLLECT: step = UnresolvedCollectExecutionStep( step_snap.step_handle, pipeline_name, step_inputs, step_outputs, step_snap.tags, ) else: raise Exception(f"Unexpected step kind {str(step_snap.kind)}") step_dict[step.handle] = step step_handles_to_execute = [ StepHandle.parse_from_key(key) for key in execution_plan_snapshot.step_keys_to_execute ] executable_map, resolvable_map = _compute_step_maps( step_dict, step_handles_to_execute, execution_plan_snapshot.initial_known_state, ) return ExecutionPlan( step_dict, executable_map, resolvable_map, step_handles_to_execute, execution_plan_snapshot.initial_known_state, execution_plan_snapshot.artifacts_persisted, )
def get_step_input_source( plan_builder: _PlanBuilder, solid: Solid, input_name: str, input_def: InputDefinition, dependency_structure: DependencyStructure, handle: SolidHandle, parent_step_inputs: Optional[List[Union[StepInput, UnresolvedMappedStepInput, UnresolvedCollectStepInput]]], ): check.inst_param(plan_builder, "plan_builder", _PlanBuilder) check.inst_param(solid, "solid", Solid) check.str_param(input_name, "input_name") check.inst_param(input_def, "input_def", InputDefinition) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(handle, "handle", SolidHandle) check.opt_list_param( parent_step_inputs, "parent_step_inputs", of_type=(StepInput, UnresolvedMappedStepInput, UnresolvedCollectStepInput), ) input_handle = solid.input_handle(input_name) solid_config = plan_builder.environment_config.solids.get(str(handle)) input_def = solid.definition.input_def_named(input_name) if input_def.root_manager_key and not dependency_structure.has_deps( input_handle): return FromRootInputManager(solid_handle=handle, input_name=input_name) if dependency_structure.has_direct_dep(input_handle): solid_output_handle = dependency_structure.get_direct_dep(input_handle) step_output_handle = plan_builder.get_output_handle( solid_output_handle) if isinstance(step_output_handle, UnresolvedStepOutputHandle): return FromUnresolvedStepOutput( unresolved_step_output_handle=step_output_handle, solid_handle=handle, input_name=input_name, ) if solid_output_handle.output_def.is_dynamic: return FromPendingDynamicStepOutput( step_output_handle=step_output_handle, solid_handle=handle, input_name=input_name, ) return FromStepOutput( step_output_handle=step_output_handle, solid_handle=handle, input_name=input_name, fan_in=False, ) if dependency_structure.has_fan_in_deps(input_handle): sources: List[StepInputSource] = [] deps = dependency_structure.get_fan_in_deps(input_handle) for idx, handle_or_placeholder in enumerate(deps): if isinstance(handle_or_placeholder, SolidOutputHandle): step_output_handle = plan_builder.get_output_handle( handle_or_placeholder) if (isinstance(step_output_handle, UnresolvedStepOutputHandle) or handle_or_placeholder.output_def.is_dynamic): check.failed( "Unexpected dynamic output dependency in regular fan in, " "should have been caught at definition time.") sources.append( FromStepOutput( step_output_handle=step_output_handle, solid_handle=handle, input_name=input_name, fan_in=True, )) else: check.invariant( handle_or_placeholder is MappedInputPlaceholder, f"Expected SolidOutputHandle or MappedInputPlaceholder, got {handle_or_placeholder}", ) if parent_step_inputs is None: check.failed( "unexpected error in composition descent during plan building" ) parent_name = solid.container_mapped_fan_in_input( input_name, idx).definition.name parent_inputs = { step_input.name: step_input for step_input in parent_step_inputs } parent_input = parent_inputs[parent_name] source = parent_input.source if not isinstance(source, StepInputSource): check.failed( f"Unexpected parent mapped input source type {source}") sources.append(source) return FromMultipleSources(solid_handle=handle, input_name=input_name, sources=sources) if dependency_structure.has_dynamic_fan_in_dep(input_handle): solid_output_handle = dependency_structure.get_dynamic_fan_in_dep( input_handle) step_output_handle = plan_builder.get_output_handle( solid_output_handle) if isinstance(step_output_handle, UnresolvedStepOutputHandle): return FromDynamicCollect( solid_handle=handle, input_name=input_name, source=FromUnresolvedStepOutput( unresolved_step_output_handle=step_output_handle, solid_handle=handle, input_name=input_name, ), ) elif solid_output_handle.output_def.is_dynamic: return FromDynamicCollect( solid_handle=handle, input_name=input_name, source=FromPendingDynamicStepOutput( step_output_handle=step_output_handle, solid_handle=handle, input_name=input_name, ), ) if solid_config and input_name in solid_config.inputs: return FromConfig(solid_handle=handle, input_name=input_name) if solid.container_maps_input(input_name): if parent_step_inputs is None: check.failed( "unexpected error in composition descent during plan building") parent_name = solid.container_mapped_input(input_name).definition.name parent_inputs = { step_input.name: step_input for step_input in parent_step_inputs } if parent_name in parent_inputs: parent_input = parent_inputs[parent_name] return parent_input.source # else fall through to Nothing case or raise if solid.definition.input_has_default(input_name): return FromDefaultValue(solid_handle=handle, input_name=input_name) # At this point we have an input that is not hooked up to # the output of another solid or provided via environment config. # We will allow this for "Nothing" type inputs and continue. if input_def.dagster_type.kind == DagsterTypeKind.NOTHING: return None # Otherwise we throw an error. raise DagsterInvariantViolationError( ("In pipeline {pipeline_name} solid {solid_name}, input {input_name} " "must get a value either (a) from a dependency or (b) from the " "inputs section of its configuration.").format( pipeline_name=plan_builder.pipeline_name, solid_name=solid.name, input_name=input_name))
def _check_execute_pipeline_args(fn_name, pipeline, environment_dict, mode, preset, tags, run_config, instance): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() environment_dict = check.opt_dict_param(environment_dict, 'environment_dict') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, default=RunConfig()) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) check.invariant( run_config.mode is None or pipeline_preset.mode == run_config.mode, 'The mode set in preset \'{preset}\' (\'{preset_mode}\') does not agree with the mode ' 'set in the `run_config` (\'{run_config_mode}\')'.format( preset=preset, preset_mode=pipeline_preset.mode, run_config_mode=run_config.mode), ) if pipeline_preset.environment_dict is not None: check.invariant( (not environment_dict) or (pipeline_preset.environment_dict == environment_dict), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `environment_dict` argument.'.format( preset=preset), ) environment_dict = pipeline_preset.environment_dict if pipeline_preset.solid_subset is not None: pipeline = pipeline.build_sub_pipeline( pipeline_preset.solid_subset) check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if run_config.mode is not None or run_config.tags: warnings.warn(( 'In 0.8.0, the use of `run_config` to set pipeline mode and tags will be ' 'deprecated. Please use the `mode` and `tags` arguments to `{fn_name}` ' 'instead.').format(fn_name=fn_name)) if run_config.mode is not None: if mode is not None: check.invariant( run_config.mode == mode, 'Mode \'{mode}\' does not agree with the mode set in the `run_config`: ' '\'{run_config_mode}\''.format( mode=mode, run_config_mode=run_config.mode), ) mode = run_config.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if not pipeline_def.is_single_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(merge_dicts(pipeline_def.tags, run_config.tags or {}), tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, environment_dict, mode=mode, step_keys_to_execute=run_config.step_keys_to_execute, ) return pipeline, environment_dict, instance, mode, tags, run_config, execution_plan
def execute_run( pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance, raise_on_error: bool = False, ) -> PipelineExecutionResult: """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (IPipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): if isinstance(pipeline, JobDefinition): error = "execute_run requires a reconstructable job but received job definition directly instead." else: error = ( "execute_run requires a reconstructable pipeline but received pipeline definition " "directly instead.") raise DagsterInvariantViolationError( f"{error} To support hand-off to other processes please wrap your definition in " "a call to reconstructable(). Learn more about reconstructable here: https://docs.dagster.io/_apidocs/execution#dagster.reconstructable" ) check.inst_param(pipeline, "pipeline", IPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Run {} ({}) in state {}, expected NOT_STARTED or STARTING". format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.". format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = _get_execution_plan_from_run(pipeline, pipeline_run, instance) output_capture: Optional[Dict[StepOutputHandle, Any]] = {} _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=orchestration_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, executor_defs=None, output_capture=output_capture, ), ) event_list = list(_execute_run_iterable) return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline, pipeline_run.run_config, pipeline_run, instance, ), output_capture=output_capture, )
def date_partition_range( start, end=None, delta_range="days", fmt=None, inclusive=False, timezone=None, ): """ Utility function that returns a partition generating function to be used in creating a `PartitionSet` definition. Args: start (datetime): Datetime capturing the start of the time range. end (Optional(datetime)): Datetime capturing the end of the partition. By default, the current time is used. The range is not inclusive of the end value. delta_range (Optional(str)): string representing the time duration of each partition. Must be a valid argument to pendulum.period.range ("days", "hours", "months", etc.). fmt (Optional(str)): Format string to represent each partition by its start time inclusive (Optional(bool)): By default, the partition set only contains date interval partitions for which the end time of the interval is less than current time. In other words, the partition set contains date interval partitions that are completely in the past. If inclusive is set to True, then the partition set will include all date interval partitions for which the start time of the interval is less than the current time. timezone (Optional(str)): Timezone in which the partition values should be expressed. Returns: Callable[[], List[Partition]] """ check.inst_param(start, "start", datetime.datetime) check.opt_inst_param(end, "end", datetime.datetime) check.str_param(delta_range, "delta_range") fmt = check.opt_str_param(fmt, "fmt", default=DEFAULT_DATE_FORMAT) check.opt_str_param(timezone, "timezone") delta_amount = 1 if end and start > end: raise DagsterInvariantViolationError( 'Selected date range start "{start}" is after date range end "{end}' .format( start=start.strftime(fmt), end=end.strftime(fmt), )) def get_date_range_partitions(current_time=None): check.opt_inst_param(current_time, "current_time", datetime.datetime) tz = timezone if timezone else pendulum.now().timezone.name _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else pendulum.instance(start, tz=tz)) if end: _end = end elif current_time: _end = current_time else: _end = pendulum.now(tz) # coerce to the definition timezone if isinstance(_end, pendulum.Pendulum): _end = _end.in_tz(tz) else: _end = pendulum.instance(_end, tz=tz) period = pendulum.period(_start, _end) date_names = [ Partition(value=current, name=current.strftime(fmt)) for current in period.range(delta_range, delta_amount) ] # We don't include the last element here by default since we only want # fully completed intervals, and the _end time is in the middle of the interval # represented by the last element of date_names if inclusive: return date_names return date_names[:-1] return get_date_range_partitions
def _store_output( step_context: StepExecutionContext, step_output_handle: StepOutputHandle, output: Union[Output, DynamicOutput], input_lineage: List[AssetLineageInfo], ) -> Iterator[DagsterEvent]: output_def = step_context.solid_def.output_def_named(step_output_handle.output_name) output_manager = step_context.get_io_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) manager_materializations = [] manager_metadata_entries = [] # output_manager.handle_output is either a generator function, or a normal function with or # without a return value. In the case that handle_output is a normal function, we need to # catch errors should they be raised before a return value. We can do this by wrapping # handle_output in a generator so that errors will be caught within iterate_with_context. if not inspect.isgeneratorfunction(output_manager.handle_output): def _gen_fn(): gen_output = output_manager.handle_output(output_context, output.value) if gen_output: yield gen_output handle_output_gen = _gen_fn() else: handle_output_gen = output_manager.handle_output(output_context, output.value) for elt in iterate_with_context( lambda: solid_execution_error_boundary( DagsterExecutionHandleOutputError, msg_fn=lambda: ( f'Error occurred while handling output "{output_context.name}" of ' f'step "{step_context.step.key}":' ), step_context=step_context, step_key=step_context.step.key, output_name=output_context.name, ), handle_output_gen, ): if isinstance(elt, AssetMaterialization): manager_materializations.append(elt) elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)): experimental_functionality_warning( "Yielding metadata from an IOManager's handle_output() function" ) manager_metadata_entries.append(elt) else: raise DagsterInvariantViolationError( f"IO manager on output {output_def.name} has returned " f"value {elt} of type {type(elt).__name__}. The return type can only be " "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry." ) # do not alter explicitly created AssetMaterializations for materialization in manager_materializations: yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage) asset_key, partitions = _asset_key_and_partitions_for_output( output_context, output_def, output_manager ) if asset_key: for materialization in _get_output_asset_materializations( asset_key, partitions, output, output_def, manager_metadata_entries, ): yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage) yield DagsterEvent.handled_output( step_context, output_name=step_output_handle.output_name, manager_key=output_def.io_manager_key, metadata_entries=[ entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry) ], )
def reconstructable(target): """ Create a ReconstructablePipeline from a function that returns a PipelineDefinition, or a function decorated with :py:func:`@pipeline <dagster.pipeline>` When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or in different systems (like dagstermill), Dagster must know how to reconstruct the pipeline on the other side of the process boundary. This function implements a very conservative strategy for reconstructing pipelines, so that its behavior is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method call), or in interactive environments such as the Python REPL or Jupyter notebooks. If you need to reconstruct pipelines constructed in these ways, you should use :py:func:`build_reconstructable_pipeline` instead, which allows you to specify your own strategy for reconstructing a pipeline. Examples: .. code-block:: python from dagster import PipelineDefinition, pipeline, recontructable @pipeline def foo_pipeline(): ... reconstructable_foo_pipeline = reconstructable(foo_pipeline) def make_bar_pipeline(): return PipelineDefinition(...) reconstructable_bar_pipeline = reconstructable(bar_pipeline) """ from dagster.core.definitions import PipelineDefinition if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition): raise DagsterInvariantViolationError( "Reconstructable target should be a function or definition produced " "by a decorated function, got {type}.".format(type=type(target)), ) if seven.is_lambda(target): raise DagsterInvariantViolationError( "Reconstructable target can not be a lambda. Use a function or " "decorated function defined at module scope instead, or use " "build_reconstructable_pipeline.") if seven.qualname_differs(target): raise DagsterInvariantViolationError( 'Reconstructable target "{target.__name__}" has a different ' '__qualname__ "{target.__qualname__}" indicating it is not ' "defined at module scope. Use a function or decorated function " "defined at module scope instead, or use build_reconstructable_pipeline." .format(target=target)) python_file = get_python_file_from_previous_stack_frame() if python_file.endswith("<stdin>"): raise DagsterInvariantViolationError( "reconstructable() can not reconstruct pipelines from <stdin>, unable to " "target file {}. Use a pipeline defined in a module or file instead, or " "use build_reconstructable_pipeline.".format(python_file)) pointer = FileCodePointer(python_file=python_file, fn_name=target.__name__, working_directory=os.getcwd()) # ipython: # Exception: Can not import module <ipython-input-3-70f55f9e97d2> from path /Users/max/Desktop/richard_brady_repro/<ipython-input-3-70f55f9e97d2>, unable to load spec. # Exception: Can not import module from path /private/var/folders/zc/zyv5jx615157j4mypwcx_kxr0000gn/T/b3edec1e-b4c5-4ea4-a4ae-24a01e566aba/, unable to load spec. return bootstrap_standalone_recon_pipeline(pointer)
def executor_failing(_): raise DagsterInvariantViolationError()
def create_location(self) -> NoReturn: raise DagsterInvariantViolationError( "A RegisteredRepositoryLocationOrigin does not have enough information to load its " "repository location on its own.")
def reconstructable(target): """ Create a :py:class:`~dagster.core.definitions.reconstructable.ReconstructablePipeline` from a function that returns a :py:class:`~dagster.PipelineDefinition`/:py:class:`~dagster.JobDefinition`, or a function decorated with :py:func:`@pipeline <dagster.pipeline>`/:py:func:`@job <dagster.job>`. When your pipeline/job must cross process boundaries, e.g., for execution on multiple nodes or in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline/job on the other side of the process boundary. Passing a job created with ``~dagster.GraphDefinition.to_job`` to ``reconstructable()``, requires you to wrap that job's definition in a module-scoped function, and pass that function instead: .. code-block:: python from dagster import graph, reconstructable @graph def my_graph(): ... def define_my_job(): return my_graph.to_job() reconstructable(define_my_job) This function implements a very conservative strategy for reconstruction, so that its behavior is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines or jobs, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method call), or in interactive environments such as the Python REPL or Jupyter notebooks. If you need to reconstruct objects constructed in these ways, you should use :py:func:`~dagster.reconstructable.build_reconstructable_job` instead, which allows you to specify your own reconstruction strategy. Examples: .. code-block:: python from dagster import job, reconstructable @job def foo_job(): ... reconstructable_foo_job = reconstructable(foo_job) @graph def foo(): ... def make_bar_job(): return foo.to_job() reconstructable_bar_job = reconstructable(make_bar_job) """ from dagster.core.definitions import PipelineDefinition, JobDefinition if not seven.is_function_or_decorator_instance_of(target, PipelineDefinition): if isinstance(target, JobDefinition): raise DagsterInvariantViolationError( "Reconstructable target was not a function returning a job definition, or a job " "definition produced by a decorated function. If your job was constructed using " "``GraphDefinition.to_job``, you must wrap the ``to_job`` call in a function at " "module scope, ie not within any other functions. " "To learn more, check out the docs on ``reconstructable``: " "https://docs.dagster.io/_apidocs/execution#dagster.reconstructable" ) raise DagsterInvariantViolationError( "Reconstructable target should be a function or definition produced " "by a decorated function, got {type}.".format(type=type(target)), ) if seven.is_lambda(target): raise DagsterInvariantViolationError( "Reconstructable target can not be a lambda. Use a function or " "decorated function defined at module scope instead, or use " "build_reconstructable_job.") if seven.qualname_differs(target): raise DagsterInvariantViolationError( 'Reconstructable target "{target.__name__}" has a different ' '__qualname__ "{target.__qualname__}" indicating it is not ' "defined at module scope. Use a function or decorated function " "defined at module scope instead, or use build_reconstructable_job." .format(target=target)) try: if (hasattr(target, "__module__") and hasattr(target, "__name__") and inspect.getmodule(target).__name__ != "__main__"): return ReconstructablePipeline.for_module(target.__module__, target.__name__) except: pass python_file = get_python_file_from_target(target) if not python_file: raise DagsterInvariantViolationError( "reconstructable() can not reconstruct jobs or pipelines defined in interactive " "environments like <stdin>, IPython, or Jupyter notebooks. " "Use a pipeline defined in a module or file instead, or use build_reconstructable_job." ) pointer = FileCodePointer(python_file=python_file, fn_name=target.__name__, working_directory=os.getcwd()) return bootstrap_standalone_recon_pipeline(pointer)
def create_location(self) -> NoReturn: raise DagsterInvariantViolationError( "A ManagedGrpcPythonEnvRepositoryLocationOrigin needs a DynamicWorkspace" " in order to create a handle.")
def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, solid_selection=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan) # note that solids_to_execute is required to execute the solid subset, which is the # frozenset version of the previous solid_subset. # solid_selection is not required and will not be converted to solids_to_execute here. # i.e. this function doesn't handle solid queries. # solid_selection is only used to pass the user queries further down. check.opt_set_param(solids_to_execute, "solids_to_execute", of_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): # for the case when pipeline_def is created by IPipeline or ExternalPipeline check.invariant( solids_to_execute == pipeline_def.solids_to_execute, "Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} " "that conflicts with solids_to_execute arg {solids_to_execute}" .format( pipeline_solids_to_execute=str_format_list( pipeline_def.solids_to_execute), solids_to_execute=str_format_list(solids_to_execute), ), ) else: # for cases when `create_run_for_pipeline` is directly called pipeline_def = pipeline_def.get_pipeline_subset_def( solids_to_execute=solids_to_execute) full_execution_plan = execution_plan or create_execution_plan( pipeline_def, run_config=run_config, mode=mode, ) check.invariant( len(full_execution_plan.step_keys_to_execute) == len( full_execution_plan.steps)) if _is_memoized_run(tags): if step_keys_to_execute: raise DagsterInvariantViolationError( "step_keys_to_execute parameter cannot be used in conjunction with memoized " "pipeline runs.") step_keys_to_execute = self.resolve_unmemoized_steps( full_execution_plan, run_config=run_config, mode=mode, ) # TODO: tighter integration with existing step_keys_to_execute functionality subsetted_execution_plan = ( full_execution_plan.build_subset_plan(step_keys_to_execute) if step_keys_to_execute else full_execution_plan) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=check.opt_str_param( mode, "mode", default=pipeline_def.get_default_mode_name()), solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( subsetted_execution_plan, pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot( ), )
def assert_in_composition(solid_name: str) -> None: if len(_composition_stack) < 1: raise DagsterInvariantViolationError( 'Attempted to call solid "{solid_name}" outside of a composition function. ' "Calling solids is only valid in a function decorated with " "@pipeline or @composite_solid.".format(solid_name=solid_name))
def execute_run( pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance, raise_on_error: bool = False, ) -> PipelineExecutionResult: """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (IPipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( "execute_run requires an IPipeline but received a PipelineDefinition " "directly instead. To support hand-off to other processes provide a " "ReconstructablePipeline which can be done using reconstructable(). For in " "process only execution you can use InMemoryPipeline." ) check.inst_param(pipeline, "pipeline", IPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING".format( pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status ), ) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.".format( pipeline_solids_to_execute=str_format_set(pipeline.solids_to_execute), solids_to_execute=str_format_set(pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute ) execution_plan = create_execution_plan( pipeline, run_config=pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) if is_memoized_run(pipeline_run.tags): execution_plan = resolve_memoized_execution_plan(execution_plan, pipeline_run.run_config) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, ), ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context # workaround for mem_io_manager to work in reconstruct_context, e.g. result.result_for_solid # in-memory values dict will get lost when the resource is re-initiated in reconstruct_context # so instead of re-initiating every single resource, we pass the resource instances to # reconstruct_context directly to avoid re-building from resource def. resource_instances_to_override = {} if pipeline_context: # None if we have a pipeline failure for ( key, resource_instance, ) in pipeline_context.scoped_resources_builder.resource_instance_dict.items(): if isinstance(resource_instance, InMemoryIOManager): resource_instances_to_override[key] = resource_instance return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda hardcoded_resources_arg: scoped_pipeline_context( execution_plan, pipeline_run.run_config, pipeline_run, instance, intermediate_storage=pipeline_context.intermediate_storage, resource_instances_to_override=hardcoded_resources_arg, ), resource_instances_to_override=resource_instances_to_override, )
def _validate_and_coerce_solid_result_to_iterator(result, context, output_defs): if isinstance(result, (AssetMaterialization, Materialization, ExpectationResult)): raise DagsterInvariantViolationError(( "Error in {described_op}: If you are returning an AssetMaterialization " "or an ExpectationResult from {node_type} you must yield them to avoid " "ambiguity with an implied result from returning a value.".format( described_op=context.describe_op(), node_type=context.solid_def.node_type_str, ))) if inspect.isgenerator(result): # this happens when a user explicitly returns a generator in the solid for event in result: yield event elif isinstance(result, Output): yield result elif len(output_defs) == 1: if result is None and output_defs[0].is_required is False: context.log.warn( 'Value "None" returned for non-required output "{output_name}" of {described_op}. ' "This value will be passed to downstream {node_type}s. For conditional execution use\n" ' yield Output(value, "{output_name}")\n' "when you want the downstream {node_type}s to execute, " "and do not yield it when you want downstream solids to skip.". format( output_name=output_defs[0].name, described_op=context.describe_op(), node_type=context.solid_def.node_type_str, )) metadata = context.get_output_metadata(output_defs[0].name) yield Output(value=result, output_name=output_defs[0].name, metadata=metadata) elif len(output_defs) > 1 and isinstance(result, tuple): if len(result) != len(output_defs): check.failed( f"Solid '{context.solid_name}' has {len(output_defs)} output definitions, but " f"returned a tuple with {len(result)} elements") for output_def, element in zip(output_defs, result): metadata = context.get_output_metadata(output_def.name) yield Output(output_name=output_def.name, value=element, metadata=metadata) elif result is not None: if not output_defs: raise DagsterInvariantViolationError(( "Error in {described_op}: Unexpectedly returned output {result} " "of type {type_}. {node_type} is explicitly defined to return no " "results.").format( described_op=context.describe_op(), result=result, type_=type(result), node_type=context.solid_def.node_type_str.capitalize(), )) raise DagsterInvariantViolationError( ("Error in {described_op}: {node_type} unexpectedly returned " "output {result} of type {type_}. Should " "be a generator, containing or yielding " "{n_results} results: {{{expected_results}}}.").format( described_op=context.describe_op(), node_type=context.solid_def.node_type_str, result=result, type_=type(result), n_results=len(output_defs), expected_results=", ".join([ "'{result_name}': {dagster_type}".format( result_name=output_def.name, dagster_type=output_def.dagster_type, ) for output_def in output_defs ]), ))
def get_step_input(plan_builder, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs): check.inst_param(plan_builder, 'plan_builder', _PlanBuilder) check.inst_param(solid, 'solid', Solid) check.str_param(input_name, 'input_name') check.inst_param(input_def, 'input_def', InputDefinition) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) check.opt_inst_param(handle, 'handle', SolidHandle) check.opt_list_param(parent_step_inputs, 'parent_step_inputs', of_type=StepInput) solid_config = plan_builder.environment_config.solids.get(str(handle)) if solid_config and input_name in solid_config.inputs: return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.CONFIG, config_data=solid_config.inputs[input_name], ) input_handle = solid.input_handle(input_name) if dependency_structure.has_singular_dep(input_handle): solid_output_handle = dependency_structure.get_singular_dep( input_handle) return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.SINGLE_OUTPUT, source_handles=[ plan_builder.get_output_handle(solid_output_handle) ], ) if dependency_structure.has_multi_deps(input_handle): solid_output_handles = dependency_structure.get_multi_deps( input_handle) return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.MULTIPLE_OUTPUTS, source_handles=[ plan_builder.get_output_handle(solid_output_handle) for solid_output_handle in solid_output_handles ], ) if solid.container_maps_input(input_name): parent_name = solid.container_mapped_input(input_name).definition.name parent_inputs = { step_input.name: step_input for step_input in parent_step_inputs } if parent_name in parent_inputs: parent_input = parent_inputs[parent_name] return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=parent_input.source_type, source_handles=parent_input.source_handles, config_data=parent_input.config_data, ) if solid.definition.input_has_default(input_name): return StepInput( name=input_name, dagster_type=input_def.dagster_type, source_type=StepInputSourceType.DEFAULT_VALUE, config_data=solid.definition.default_value_for_input(input_name), ) # At this point we have an input that is not hooked up to # the output of another solid or provided via environment config. # We will allow this for "Nothing" type inputs and continue. if input_def.dagster_type.kind == DagsterTypeKind.NOTHING: return None # Otherwise we throw an error. raise DagsterInvariantViolationError( ('In pipeline {pipeline_name} solid {solid_name}, input {input_name} ' 'must get a value either (a) from a dependency or (b) from the ' 'inputs section of its configuration.').format( pipeline_name=plan_builder.pipeline_name, solid_name=solid.name, input_name=input_name))
def _store_output( step_context: StepExecutionContext, step_output_handle: StepOutputHandle, output: Union[Output, DynamicOutput], input_lineage: List[AssetLineageInfo], ) -> Iterator[DagsterEvent]: output_def = step_context.solid_def.output_def_named(step_output_handle.output_name) output_manager = step_context.get_io_manager(step_output_handle) output_context = step_context.get_output_context(step_output_handle) with user_code_error_boundary( DagsterExecutionHandleOutputError, control_flow_exceptions=[Failure, RetryRequested], msg_fn=lambda: ( f'Error occurred while handling output "{output_context.name}" of ' f'step "{step_context.step.key}":' ), step_key=step_context.step.key, output_name=output_context.name, ): handle_output_res = output_manager.handle_output(output_context, output.value) manager_materializations = [] manager_metadata_entries = [] if handle_output_res is not None: for elt in ensure_gen(handle_output_res): if isinstance(elt, AssetMaterialization): manager_materializations.append(elt) elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)): experimental_functionality_warning( "Yielding metadata from an IOManager's handle_output() function" ) manager_metadata_entries.append(elt) else: raise DagsterInvariantViolationError( f"IO manager on output {output_def.name} has returned " f"value {elt} of type {type(elt).__name__}. The return type can only be " "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry." ) # do not alter explicitly created AssetMaterializations for materialization in manager_materializations: yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage) asset_key, partitions = _asset_key_and_partitions_for_output( output_context, output_def, output_manager ) if asset_key: for materialization in _get_output_asset_materializations( asset_key, partitions, output, output_def, manager_metadata_entries, ): yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage) yield DagsterEvent.handled_output( step_context, output_name=step_output_handle.output_name, manager_key=output_def.io_manager_key, message_override=f'Handled input "{step_output_handle.output_name}" using intermediate storage' if isinstance(output_manager, IntermediateStorageAdapter) else None, metadata_entries=[ entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry) ], )
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False): """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (ExecutablePipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( "execute_run requires an ExecutablePipeline but received a PipelineDefinition " "directly instead. To support hand-off to other processes provide a " "ReconstructablePipeline which can be done using reconstructable(). For in " "process only execution you can use InMemoryExecutablePipeline.") check.inst_param(pipeline, "pipeline", ExecutablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.". format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( pipeline, run_config=pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) _execute_run_iterable = _ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=_pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, ), ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline_run.run_config, pipeline_run, instance, intermediate_storage=pipeline_context.intermediate_storage, system_storage_data=SystemStorageData( intermediate_storage=pipeline_context.intermediate_storage, file_manager=pipeline_context.file_manager, ), ), )
def _check_execute_pipeline_args(pipeline, run_config, mode, preset, tags, instance, solid_selection=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) run_config = check.opt_dict_param(run_config, 'run_config') check.opt_str_param(mode, 'mode') check.opt_str_param(preset, 'preset') check.invariant( not (mode is not None and preset is not None), 'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).' .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, 'tags', key_type=str) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) if pipeline_preset.run_config is not None: check.invariant( (not run_config) or (pipeline_preset.run_config == run_config), 'The environment set in preset \'{preset}\' does not agree with the environment ' 'passed in the `run_config` argument.'.format(preset=preset), ) run_config = pipeline_preset.run_config # load solid_selection from preset if pipeline_preset.solid_selection is not None: check.invariant( solid_selection is None or solid_selection == pipeline_preset.solid_selection, 'The solid_selection set in preset \'{preset}\', {preset_subset}, does not agree with ' 'the `solid_selection` argument: {solid_selection}'.format( preset=preset, preset_subset=pipeline_preset.solid_selection, solid_selection=solid_selection, ), ) solid_selection = pipeline_preset.solid_selection check.invariant( mode is None or mode == pipeline_preset.mode, 'Mode {mode} does not agree with the mode set in preset \'{preset}\': ' '(\'{preset_mode}\')'.format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( 'You have attempted to execute pipeline {name} with mode {mode}. ' 'Available modes: {modes}').format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if pipeline_def.is_multi_mode: raise DagsterInvariantViolationError(( 'Pipeline {name} has multiple modes (Available modes: {modes}) and you have ' 'attempted to execute it without specifying a mode. Set ' 'mode property on the PipelineRun object.').format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(pipeline_def.tags, tags) check.opt_inst_param(instance, 'instance', DagsterInstance) instance = instance or DagsterInstance.ephemeral() # generate pipeline subset from the given solid_selection if solid_selection: pipeline = pipeline.subset_for_execution(solid_selection) return ( pipeline, run_config, instance, mode, tags, pipeline.solids_to_execute, solid_selection, )