Exemplo n.º 1
0
def resolve_to_config_type(dagster_type):
    from dagster.core.types.wrapping.mapping import (
        remap_python_builtin_for_config,
        is_supported_config_python_builtin,
    )
    from dagster.core.types.runtime.runtime_type import RuntimeType

    if _is_config_type_class(dagster_type):
        check.param_invariant(
            False,
            'dagster_type',
            'Cannot pass a config type class to resolve_to_config_type. Got {dagster_type}'
            .format(dagster_type=dagster_type),
        )

    check.invariant(
        not (isinstance(dagster_type, type)
             and issubclass(dagster_type, RuntimeType)),
        'Cannot resolve a runtime type to a config type',
    )

    if is_typing_type(dagster_type):
        raise DagsterInvariantViolationError((
            'You have passed in {dagster_type} in the config system. Types from '
            'the typing module in python are not allowed in the config system. '
            'You must use types that are imported from dagster or primitive types '
            'such as bool, int, etc.').format(dagster_type=dagster_type))

    if isinstance(dagster_type, (WrappingSetType, DagsterSetApi)):
        raise DagsterInvalidDefinitionError(
            'Cannot use Set in the context of a config field. Please use List instead.'
        )
    if isinstance(dagster_type, (WrappingTupleType, DagsterTupleApi)):
        raise DagsterInvalidDefinitionError(
            'Cannot use Tuple in the context of a config field. Please use List instead.'
        )

    # Short circuit if it's already a Config Type
    if isinstance(dagster_type, ConfigType):
        return dagster_type

    # If we are passed here either:
    #  1) We have been passed a python builtin
    #  2) We have been a dagster wrapping type that needs to be convert its config varient
    #     e.g. dagster.List
    #  2) We have been passed an invalid thing. We return False to signify this. It is
    #     up to callers to report a reasonable error.

    if is_supported_config_python_builtin(dagster_type):
        return remap_python_builtin_for_config(dagster_type)

    if dagster_type is None:
        return ConfigAnyInstance
    if BuiltinEnum.contains(dagster_type):
        return ConfigType.from_builtin_enum(dagster_type)
    if isinstance(dagster_type, (WrappingListType, DagsterListApi)):
        return resolve_to_config_list(dagster_type)
    if isinstance(dagster_type, WrappingNullableType):
        return resolve_to_config_nullable(dagster_type)

    # This means that this is an error and we are return False to a callsite
    # We do the error reporting there because those callsites have more context
    return False
Exemplo n.º 2
0
Arquivo: init.py Projeto: keyz/dagster
 def resource_def(self) -> Optional[ResourceDefinition]:
     raise DagsterInvariantViolationError(
         "UnboundInitLoggerContext has not been validated against a logger definition."
     )
Exemplo n.º 3
0
def _check_execute_pipeline_args(pipeline,
                                 run_config,
                                 mode,
                                 preset,
                                 tags,
                                 solid_selection=None):
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)

    run_config = check.opt_dict_param(run_config, "run_config")
    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.invariant(
        not (mode is not None and preset is not None),
        "You may set only one of `mode` (got {mode}) or `preset` (got {preset})."
        .format(mode=mode, preset=preset),
    )

    tags = check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)

    if preset is not None:
        pipeline_preset = pipeline_def.get_preset(preset)

        if pipeline_preset.run_config is not None:
            check.invariant(
                (not run_config) or (pipeline_preset.run_config == run_config),
                "The environment set in preset '{preset}' does not agree with the environment "
                "passed in the `run_config` argument.".format(preset=preset),
            )

            run_config = pipeline_preset.run_config

        # load solid_selection from preset
        if pipeline_preset.solid_selection is not None:
            check.invariant(
                solid_selection is None
                or solid_selection == pipeline_preset.solid_selection,
                "The solid_selection set in preset '{preset}', {preset_subset}, does not agree with "
                "the `solid_selection` argument: {solid_selection}".format(
                    preset=preset,
                    preset_subset=pipeline_preset.solid_selection,
                    solid_selection=solid_selection,
                ),
            )
            solid_selection = pipeline_preset.solid_selection

        check.invariant(
            mode is None or mode == pipeline_preset.mode,
            "Mode {mode} does not agree with the mode set in preset '{preset}': "
            "('{preset_mode}')".format(preset=preset,
                                       preset_mode=pipeline_preset.mode,
                                       mode=mode),
        )

        mode = pipeline_preset.mode

        tags = merge_dicts(pipeline_preset.tags, tags)

    if mode is not None:
        if not pipeline_def.has_mode_definition(mode):
            raise DagsterInvariantViolationError((
                "You have attempted to execute pipeline {name} with mode {mode}. "
                "Available modes: {modes}").format(
                    name=pipeline_def.name,
                    mode=mode,
                    modes=pipeline_def.available_modes,
                ))
    else:
        if pipeline_def.is_multi_mode:
            raise DagsterInvariantViolationError((
                "Pipeline {name} has multiple modes (Available modes: {modes}) and you have "
                "attempted to execute it without specifying a mode. Set "
                "mode property on the PipelineRun object.").format(
                    name=pipeline_def.name,
                    modes=pipeline_def.available_modes))
        mode = pipeline_def.get_default_mode_name()

    tags = merge_dicts(pipeline_def.tags, tags)

    # generate pipeline subset from the given solid_selection
    if solid_selection:
        pipeline = pipeline.subset_for_execution(solid_selection)

    return (
        pipeline,
        run_config,
        mode,
        tags,
        pipeline.solids_to_execute,
        solid_selection,
    )
Exemplo n.º 4
0
 def get_pipeline_subset_def(self, solids_to_execute):
     raise DagsterInvariantViolationError(
         'Pipeline subsets may not be subset again.')
Exemplo n.º 5
0
def _step_output_error_checked_user_event_sequence(
    step_context: SystemStepExecutionContext,
    user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", SystemStepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    output_names = list([output_def.name for output_def in step.step_outputs])
    seen_outputs: Set[str] = set()
    seen_mapping_keys: Dict[str, Set[str]] = defaultdict(set)

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(output.output_name):
            raise DagsterInvariantViolationError(
                'Core compute for solid "{handle}" returned an output '
                '"{output.output_name}" that does not exist. The available '
                "outputs are {output_names}".format(handle=str(
                    step.solid_handle),
                                                    output=output,
                                                    output_names=output_names))

        step_output = step.step_output_named(output.output_name)
        output_def = step_context.pipeline_def.get_solid(
            step_output.solid_handle).output_def_named(step_output.name)

        if isinstance(output, Output):
            if output.output_name in seen_outputs:
                raise DagsterInvariantViolationError(
                    'Compute for solid "{handle}" returned an output '
                    '"{output.output_name}" multiple times'.format(
                        handle=str(step.solid_handle), output=output))

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" for output "{output.output_name}" '
                    "defined as dynamic must yield DynamicOutput, got Output.")
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput, '
                    "but did not use DynamicOutputDefinition.")
            if output.mapping_key in seen_mapping_keys[output.output_name]:
                raise DagsterInvariantViolationError(
                    f'Compute for solid "{step.solid_handle}" yielded a DynamicOutput with '
                    f'mapping_key "{output.mapping_key}" multiple times.')
            seen_mapping_keys[output.output_name].add(output.mapping_key)

        yield output
        seen_outputs.add(output.output_name)

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(
            step_output.name)
        if not step_output_def.name in seen_outputs and not step_output_def.optional:
            if step_output_def.dagster_type.kind == DagsterTypeKind.NOTHING:
                step_context.log.info(
                    'Emitting implicit Nothing for output "{output}" on solid {solid}'
                    .format(output=step_output_def.name,
                            solid={str(step.solid_handle)}))
                yield Output(output_name=step_output_def.name, value=None)
            else:
                raise DagsterStepOutputNotFoundError(
                    'Core compute for solid "{handle}" did not return an output '
                    'for non-optional output "{step_output_def.name}"'.format(
                        handle=str(step.solid_handle),
                        step_output_def=step_output_def),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemplo n.º 6
0
def _check_execute_pipeline_args(pipeline, environment_dict, mode, preset,
                                 tags, solid_subset, instance):
    pipeline, pipeline_def = _check_pipeline(pipeline)
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict')
    check.opt_str_param(mode, 'mode')
    check.opt_str_param(preset, 'preset')
    check.invariant(
        not (mode is not None and preset is not None),
        'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).'
        .format(mode=mode, preset=preset),
    )

    tags = check.opt_dict_param(tags, 'tags', key_type=str)
    check.opt_list_param(solid_subset, 'solid_subset', of_type=str)
    if solid_subset:
        # resolve solid selection queries to a list of qualified solid names
        parsed_solid_subset = parse_solid_subset(pipeline_def, solid_subset)
        if len(parsed_solid_subset) == 0:
            raise DagsterInvariantViolationError(
                'No qualified solid subset found for solid_subset={input}'.
                format(input=solid_subset))
        solid_subset = parsed_solid_subset

    if preset is not None:
        pipeline_preset = pipeline_def.get_preset(preset)

        if pipeline_preset.environment_dict is not None:
            check.invariant(
                (not environment_dict)
                or (pipeline_preset.environment_dict == environment_dict),
                'The environment set in preset \'{preset}\' does not agree with the environment '
                'passed in the `environment_dict` argument.'.format(
                    preset=preset),
            )

            environment_dict = pipeline_preset.environment_dict

        if pipeline_preset.solid_subset is not None:
            check.invariant(
                solid_subset is None
                or solid_subset == pipeline_preset.solid_subset,
                'The solid_subset set in preset \'{preset}\', {preset_subset}, does not agree with '
                'the `solid_subset` argument: {solid_subset}'.format(
                    preset=preset,
                    preset_subset=pipeline_preset.solid_subset,
                    solid_subset=solid_subset,
                ),
            )
            solid_subset = pipeline_preset.solid_subset

        check.invariant(
            mode is None or mode == pipeline_preset.mode,
            'Mode {mode} does not agree with the mode set in preset \'{preset}\': '
            '(\'{preset_mode}\')'.format(preset=preset,
                                         preset_mode=pipeline_preset.mode,
                                         mode=mode),
        )

        mode = pipeline_preset.mode

    if mode is not None:
        if not pipeline_def.has_mode_definition(mode):
            raise DagsterInvariantViolationError((
                'You have attempted to execute pipeline {name} with mode {mode}. '
                'Available modes: {modes}').format(
                    name=pipeline_def.name,
                    mode=mode,
                    modes=pipeline_def.available_modes,
                ))
    else:
        if pipeline_def.is_multi_mode:
            raise DagsterInvariantViolationError((
                'Pipeline {name} has multiple modes (Available modes: {modes}) and you have '
                'attempted to execute it without specifying a mode. Set '
                'mode property on the PipelineRun object.').format(
                    name=pipeline_def.name,
                    modes=pipeline_def.available_modes))
        mode = pipeline_def.get_default_mode_name()

    tags = merge_dicts(pipeline_def.tags, tags)

    check.opt_inst_param(instance, 'instance', DagsterInstance)
    instance = instance or DagsterInstance.ephemeral()

    if solid_subset:
        pipeline = pipeline.subset_for_execution(solid_subset)
        pipeline_def = pipeline.get_definition()
    else:
        solid_subset = pipeline_def.solid_subset

    return (pipeline, pipeline_def, environment_dict, instance, mode, tags,
            solid_subset)
Exemplo n.º 7
0
 def get_pipeline_subset_def(
         self,
         solids_to_execute: AbstractSet[str]) -> "PipelineSubsetDefinition":
     raise DagsterInvariantViolationError(
         "Pipeline subsets may not be subset again.")
Exemplo n.º 8
0
def _step_output_error_checked_user_event_sequence(
    step_context: StepExecutionContext,
    user_event_sequence: Iterator[SolidOutputUnion]
) -> Iterator[SolidOutputUnion]:
    """
    Process the event sequence to check for invariant violations in the event
    sequence related to Output events emitted from the compute_fn.

    This consumes and emits an event sequence.
    """
    check.inst_param(step_context, "step_context", StepExecutionContext)
    check.generator_param(user_event_sequence, "user_event_sequence")

    step = step_context.step
    op_label = step_context.describe_op()
    output_names = list([output_def.name for output_def in step.step_outputs])

    for user_event in user_event_sequence:
        if not isinstance(user_event, (Output, DynamicOutput)):
            yield user_event
            continue

        # do additional processing on Outputs
        output = user_event
        if not step.has_step_output(cast(str, output.output_name)):
            raise DagsterInvariantViolationError(
                f'Core compute for {op_label} returned an output "{output.output_name}" that does '
                f"not exist. The available outputs are {output_names}")

        step_output = step.step_output_named(cast(str, output.output_name))
        output_def = step_context.pipeline_def.get_solid(
            step_output.solid_handle).output_def_named(step_output.name)

        if isinstance(output, Output):
            if step_context.has_seen_output(output.output_name):
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} returned an output "{output.output_name}" multiple '
                    "times")

            if output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f'Compute for {op_label} for output "{output.output_name}" defined as dynamic '
                    "must yield DynamicOutput, got Output.")

            step_context.observe_output(output.output_name)

            metadata = step_context.get_output_metadata(output.output_name)
            output = Output(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries +
                normalize_metadata(cast(Dict[str, Any], metadata), []),
            )
        else:
            if not output_def.is_dynamic:
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput, but did not use "
                    "DynamicOutputDefinition.")
            if step_context.has_seen_output(output.output_name,
                                            output.mapping_key):
                raise DagsterInvariantViolationError(
                    f"Compute for {op_label} yielded a DynamicOutput with mapping_key "
                    f'"{output.mapping_key}" multiple times.')
            step_context.observe_output(output.output_name, output.mapping_key)
            metadata = step_context.get_output_metadata(
                output.output_name, mapping_key=output.mapping_key)
            output = DynamicOutput(
                value=output.value,
                output_name=output.output_name,
                metadata_entries=output.metadata_entries +
                normalize_metadata(cast(Dict[str, Any], metadata), []),
                mapping_key=output.mapping_key,
            )

        yield output

    for step_output in step.step_outputs:
        step_output_def = step_context.solid_def.output_def_named(
            step_output.name)
        if not step_context.has_seen_output(
                step_output_def.name) and not step_output_def.optional:
            if step_output_def.dagster_type.is_nothing:
                step_context.log.info(
                    f'Emitting implicit Nothing for output "{step_output_def.name}" on {op_label}'
                )
                yield Output(output_name=step_output_def.name, value=None)
            elif not step_output_def.is_dynamic:
                raise DagsterStepOutputNotFoundError(
                    (f"Core compute for {op_label} did not return an output for non-optional "
                     f'output "{step_output_def.name}"'),
                    step_key=step.key,
                    output_name=step_output_def.name,
                )
Exemplo n.º 9
0
def schedule_partition_range(
    start,
    end,
    cron_schedule,
    fmt,
    timezone,
    execution_time_to_partition_fn,
):
    check.inst_param(start, "start", datetime.datetime)
    check.opt_inst_param(end, "end", datetime.datetime)
    check.str_param(cron_schedule, "cron_schedule")
    check.str_param(fmt, "fmt")
    check.opt_str_param(timezone, "timezone")
    check.callable_param(execution_time_to_partition_fn,
                         "execution_time_to_partition_fn")

    if end and start > end:
        raise DagsterInvariantViolationError(
            'Selected date range start "{start}" is after date range end "{end}'
            .format(
                start=start.strftime(fmt),
                end=end.strftime(fmt),
            ))

    def get_schedule_range_partitions(current_time=None):
        check.opt_inst_param(current_time, "current_time", datetime.datetime)
        tz = timezone if timezone else pendulum.now().timezone.name
        _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else
                  pendulum.instance(start, tz=tz))

        if end:
            _end = end
        elif current_time:
            _end = current_time
        else:
            _end = pendulum.now(tz)

        # coerce to the definition timezone
        if isinstance(_end, pendulum.Pendulum):
            _end = _end.in_tz(tz)
        else:
            _end = pendulum.instance(_end, tz=tz)

        end_timestamp = _end.timestamp()

        partitions = []
        for next_time in schedule_execution_time_iterator(
                _start.timestamp(), cron_schedule, tz):

            partition_time = execution_time_to_partition_fn(next_time)

            if partition_time.timestamp() > end_timestamp:
                break

            if partition_time.timestamp() < _start.timestamp():
                continue

            partitions.append(
                Partition(value=partition_time,
                          name=partition_time.strftime(fmt)))

        return partitions[:-1]

    return get_schedule_range_partitions
Exemplo n.º 10
0
def date_partition_range(
    start, end=None, delta=None, delta_range="days", fmt=None, inclusive=False, timezone=None,
):
    """ Utility function that returns a partition generating function to be used in creating a
    `PartitionSet` definition.

    Args:
        start (datetime): Datetime capturing the start of the time range.
        end  (Optional(datetime)): Datetime capturing the end of the partition.  By default, the
                                   current time is used.  The range is not inclusive of the end
                                   value.
        delta (Optional(timedelta)): Timedelta representing the time duration of each partition.
            DEPRECATED: use 'delta_range' instead, which handles timezone transitions correctly.
        delta_range (Optional(str)): string representing the time duration of each partition.
            Must be a valid argument to pendulum.period.range ("days", "hours", "months", etc.).
        fmt (Optional(str)): Format string to represent each partition by its start time
        inclusive (Optional(bool)): By default, the partition set only contains date interval
            partitions for which the end time of the interval is less than current time. In other
            words, the partition set contains date interval partitions that are completely in the
            past. If inclusive is set to True, then the partition set will include all date
            interval partitions for which the start time of the interval is less than the
            current time.
        timezone (Optional(str)): Timezone in which the partition values should be expressed.
    Returns:
        Callable[[], List[Partition]]
    """

    check.inst_param(start, "start", datetime.datetime)
    check.opt_inst_param(end, "end", datetime.datetime)
    check.opt_str_param(delta_range, "delta_range")
    fmt = check.opt_str_param(fmt, "fmt", default=DEFAULT_DATE_FORMAT)
    check.opt_str_param(timezone, "timezone")

    check.opt_inst_param(delta, "delta", (datetime.timedelta, relativedelta))

    if delta:
        check.invariant(not delta_range, "cannot supply both 'delta' and 'delta_range' parameters")
        warnings.warn(
            "The 'delta' argument to date_partition_range has been deprecated - use 'delta_range' "
            "instead, which has better support for timezones. For example, if you previously "
            "passed in delta=timedelta(days=1), pass in delta_range='days' instead. The 'delta' "
            "argument will be removed in the dagster 0.10.0 release."
        )
        delta_range, delta_amount = _delta_to_delta_range(delta)
    else:
        check.invariant(delta_range, "Must include either a 'delta' or 'delta_range' parameter")
        delta_amount = 1

    if end and start > end:
        raise DagsterInvariantViolationError(
            'Selected date range start "{start}" is after date range end "{end}'.format(
                start=start.strftime(fmt), end=end.strftime(fmt),
            )
        )

    def get_date_range_partitions():
        tz = timezone if timezone else pendulum.now().timezone.name
        _start = (
            start.in_tz(tz)
            if isinstance(start, pendulum.Pendulum)
            else pendulum.instance(start, tz=tz)
        )

        if not end:
            _end = pendulum.now(tz)
        elif isinstance(end, pendulum.Pendulum):
            _end = end.in_tz(tz)
        else:
            _end = pendulum.instance(end, tz=tz)

        period = pendulum.period(_start, _end)
        date_names = [
            Partition(value=current, name=current.strftime(fmt))
            for current in period.range(delta_range, delta_amount)
        ]

        # We don't include the last element here by default since we only want
        # fully completed intervals, and the _end time is in the middle of the interval
        # represented by the last element of date_names
        if inclusive:
            return date_names

        return date_names[:-1]

    return get_date_range_partitions
Exemplo n.º 11
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(
        step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    manager_materializations = []
    manager_metadata_entries: List[Union[PartitionMetadataEntry,
                                         MetadataEntry]] = []

    # output_manager.handle_output is either a generator function, or a normal function with or
    # without a return value. In the case that handle_output is a normal function, we need to
    # catch errors should they be raised before a return value. We can do this by wrapping
    # handle_output in a generator so that errors will be caught within iterate_with_context.

    if not inspect.isgeneratorfunction(output_manager.handle_output):

        def _gen_fn():
            gen_output = output_manager.handle_output(output_context,
                                                      output.value)
            for event in output_context.consume_events():
                yield event
            if gen_output:
                yield gen_output

        handle_output_gen = _gen_fn()
    else:
        handle_output_gen = output_manager.handle_output(
            output_context, output.value)

    for elt in iterate_with_context(
            lambda: solid_execution_error_boundary(
                DagsterExecutionHandleOutputError,
                msg_fn=lambda:
                (f'Error occurred while handling output "{output_context.name}" of '
                 f'step "{step_context.step.key}":'),
                step_context=step_context,
                step_key=step_context.step.key,
                output_name=output_context.name,
            ),
            handle_output_gen,
    ):
        for event in output_context.consume_events():
            yield event

        manager_metadata_entries.extend(
            output_context.consume_logged_metadata_entries())
        if isinstance(elt, DagsterEvent):
            yield elt
        elif isinstance(elt, AssetMaterialization):
            manager_materializations.append(elt)
        elif isinstance(elt, (MetadataEntry, PartitionMetadataEntry)):
            experimental_functionality_warning(
                "Yielding metadata from an IOManager's handle_output() function"
            )
            manager_metadata_entries.append(elt)
        else:
            raise DagsterInvariantViolationError(
                f"IO manager on output {output_def.name} has returned "
                f"value {elt} of type {type(elt).__name__}. The return type can only be "
                "one of AssetMaterialization, MetadataEntry, PartitionMetadataEntry."
            )

    for event in output_context.consume_events():
        yield event

    manager_metadata_entries.extend(
        output_context.consume_logged_metadata_entries())
    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        if materialization.metadata_entries and manager_metadata_entries:
            raise DagsterInvariantViolationError(
                f"When handling output '{output_context.name}' of {output_context.solid_def.node_type_str} '{output_context.solid_def.name}', received a materialization with metadata, while context.add_output_metadata was used within the same call to handle_output. Due to potential conflicts, this is not allowed. Please specify metadata in one place within the `handle_output` function."
            )

        if manager_metadata_entries:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=ExperimentalWarning)

                materialization = AssetMaterialization(
                    asset_key=materialization.asset_key,
                    description=materialization.description,
                    metadata_entries=manager_metadata_entries,
                    partition=materialization.partition,
                    tags=materialization.tags,
                    metadata=None,
                )
        yield DagsterEvent.asset_materialization(step_context, materialization,
                                                 input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager)
    if asset_key:
        for materialization in _get_output_asset_materializations(
                asset_key,
                partitions,
                output,
                output_def,
                manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context,
                                                     materialization,
                                                     input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        metadata_entries=[
            entry for entry in manager_metadata_entries
            if isinstance(entry, MetadataEntry)
        ],
    )
Exemplo n.º 12
0
    def rebuild_from_snapshot(pipeline_name, execution_plan_snapshot):
        if not execution_plan_snapshot.can_reconstruct_plan:
            raise DagsterInvariantViolationError(
                "Tried to reconstruct an old ExecutionPlanSnapshot that was created before snapshots "
                "had enough information to fully reconstruct the ExecutionPlan"
            )

        step_dict = {}

        for step_snap in execution_plan_snapshot.steps:
            input_snaps = step_snap.inputs
            output_snaps = step_snap.outputs

            step_inputs = [
                ExecutionPlan.rebuild_step_input(step_input_snap)
                for step_input_snap in input_snaps
            ]

            step_outputs = [
                StepOutput(
                    step_output_snap.solid_handle,
                    step_output_snap.name,
                    step_output_snap.dagster_type_key,
                    step_output_snap.properties,
                ) for step_output_snap in output_snaps
            ]

            if step_snap.kind == StepKind.COMPUTE:
                step = ExecutionStep(
                    step_snap.step_handle,
                    pipeline_name,
                    step_inputs,
                    step_outputs,
                    step_snap.tags,
                )
            elif step_snap.kind == StepKind.UNRESOLVED_MAPPED:
                step = UnresolvedMappedExecutionStep(
                    step_snap.step_handle,
                    pipeline_name,
                    step_inputs,
                    step_outputs,
                    step_snap.tags,
                )
            elif step_snap.kind == StepKind.UNRESOLVED_COLLECT:
                step = UnresolvedCollectExecutionStep(
                    step_snap.step_handle,
                    pipeline_name,
                    step_inputs,
                    step_outputs,
                    step_snap.tags,
                )
            else:
                raise Exception(f"Unexpected step kind {str(step_snap.kind)}")

            step_dict[step.handle] = step

        step_handles_to_execute = [
            StepHandle.parse_from_key(key)
            for key in execution_plan_snapshot.step_keys_to_execute
        ]

        executable_map, resolvable_map = _compute_step_maps(
            step_dict,
            step_handles_to_execute,
            execution_plan_snapshot.initial_known_state,
        )

        return ExecutionPlan(
            step_dict,
            executable_map,
            resolvable_map,
            step_handles_to_execute,
            execution_plan_snapshot.initial_known_state,
            execution_plan_snapshot.artifacts_persisted,
        )
Exemplo n.º 13
0
def get_step_input_source(
    plan_builder: _PlanBuilder,
    solid: Solid,
    input_name: str,
    input_def: InputDefinition,
    dependency_structure: DependencyStructure,
    handle: SolidHandle,
    parent_step_inputs: Optional[List[Union[StepInput,
                                            UnresolvedMappedStepInput,
                                            UnresolvedCollectStepInput]]],
):
    check.inst_param(plan_builder, "plan_builder", _PlanBuilder)
    check.inst_param(solid, "solid", Solid)
    check.str_param(input_name, "input_name")
    check.inst_param(input_def, "input_def", InputDefinition)
    check.inst_param(dependency_structure, "dependency_structure",
                     DependencyStructure)
    check.opt_inst_param(handle, "handle", SolidHandle)
    check.opt_list_param(
        parent_step_inputs,
        "parent_step_inputs",
        of_type=(StepInput, UnresolvedMappedStepInput,
                 UnresolvedCollectStepInput),
    )

    input_handle = solid.input_handle(input_name)
    solid_config = plan_builder.environment_config.solids.get(str(handle))

    input_def = solid.definition.input_def_named(input_name)
    if input_def.root_manager_key and not dependency_structure.has_deps(
            input_handle):
        return FromRootInputManager(solid_handle=handle, input_name=input_name)

    if dependency_structure.has_direct_dep(input_handle):
        solid_output_handle = dependency_structure.get_direct_dep(input_handle)
        step_output_handle = plan_builder.get_output_handle(
            solid_output_handle)
        if isinstance(step_output_handle, UnresolvedStepOutputHandle):
            return FromUnresolvedStepOutput(
                unresolved_step_output_handle=step_output_handle,
                solid_handle=handle,
                input_name=input_name,
            )

        if solid_output_handle.output_def.is_dynamic:
            return FromPendingDynamicStepOutput(
                step_output_handle=step_output_handle,
                solid_handle=handle,
                input_name=input_name,
            )

        return FromStepOutput(
            step_output_handle=step_output_handle,
            solid_handle=handle,
            input_name=input_name,
            fan_in=False,
        )

    if dependency_structure.has_fan_in_deps(input_handle):
        sources: List[StepInputSource] = []
        deps = dependency_structure.get_fan_in_deps(input_handle)
        for idx, handle_or_placeholder in enumerate(deps):
            if isinstance(handle_or_placeholder, SolidOutputHandle):
                step_output_handle = plan_builder.get_output_handle(
                    handle_or_placeholder)
                if (isinstance(step_output_handle, UnresolvedStepOutputHandle)
                        or handle_or_placeholder.output_def.is_dynamic):
                    check.failed(
                        "Unexpected dynamic output dependency in regular fan in, "
                        "should have been caught at definition time.")

                sources.append(
                    FromStepOutput(
                        step_output_handle=step_output_handle,
                        solid_handle=handle,
                        input_name=input_name,
                        fan_in=True,
                    ))
            else:
                check.invariant(
                    handle_or_placeholder is MappedInputPlaceholder,
                    f"Expected SolidOutputHandle or MappedInputPlaceholder, got {handle_or_placeholder}",
                )
                if parent_step_inputs is None:
                    check.failed(
                        "unexpected error in composition descent during plan building"
                    )

                parent_name = solid.container_mapped_fan_in_input(
                    input_name, idx).definition.name
                parent_inputs = {
                    step_input.name: step_input
                    for step_input in parent_step_inputs
                }
                parent_input = parent_inputs[parent_name]
                source = parent_input.source
                if not isinstance(source, StepInputSource):
                    check.failed(
                        f"Unexpected parent mapped input source type {source}")
                sources.append(source)

        return FromMultipleSources(solid_handle=handle,
                                   input_name=input_name,
                                   sources=sources)

    if dependency_structure.has_dynamic_fan_in_dep(input_handle):
        solid_output_handle = dependency_structure.get_dynamic_fan_in_dep(
            input_handle)
        step_output_handle = plan_builder.get_output_handle(
            solid_output_handle)
        if isinstance(step_output_handle, UnresolvedStepOutputHandle):
            return FromDynamicCollect(
                solid_handle=handle,
                input_name=input_name,
                source=FromUnresolvedStepOutput(
                    unresolved_step_output_handle=step_output_handle,
                    solid_handle=handle,
                    input_name=input_name,
                ),
            )
        elif solid_output_handle.output_def.is_dynamic:
            return FromDynamicCollect(
                solid_handle=handle,
                input_name=input_name,
                source=FromPendingDynamicStepOutput(
                    step_output_handle=step_output_handle,
                    solid_handle=handle,
                    input_name=input_name,
                ),
            )

    if solid_config and input_name in solid_config.inputs:
        return FromConfig(solid_handle=handle, input_name=input_name)

    if solid.container_maps_input(input_name):
        if parent_step_inputs is None:
            check.failed(
                "unexpected error in composition descent during plan building")

        parent_name = solid.container_mapped_input(input_name).definition.name
        parent_inputs = {
            step_input.name: step_input
            for step_input in parent_step_inputs
        }
        if parent_name in parent_inputs:
            parent_input = parent_inputs[parent_name]
            return parent_input.source
        # else fall through to Nothing case or raise

    if solid.definition.input_has_default(input_name):
        return FromDefaultValue(solid_handle=handle, input_name=input_name)

    # At this point we have an input that is not hooked up to
    # the output of another solid or provided via environment config.

    # We will allow this for "Nothing" type inputs and continue.
    if input_def.dagster_type.kind == DagsterTypeKind.NOTHING:
        return None

    # Otherwise we throw an error.
    raise DagsterInvariantViolationError(
        ("In pipeline {pipeline_name} solid {solid_name}, input {input_name} "
         "must get a value either (a) from a dependency or (b) from the "
         "inputs section of its configuration.").format(
             pipeline_name=plan_builder.pipeline_name,
             solid_name=solid.name,
             input_name=input_name))
Exemplo n.º 14
0
def _check_execute_pipeline_args(fn_name, pipeline, environment_dict, mode,
                                 preset, tags, run_config, instance):
    # backcompat
    if isinstance(pipeline, PipelineDefinition):
        pipeline = InMemoryExecutablePipeline(pipeline)

    check.inst_param(pipeline, 'pipeline', ExecutablePipeline)
    pipeline_def = pipeline.get_definition()

    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict')

    check.opt_str_param(mode, 'mode')
    check.opt_str_param(preset, 'preset')
    check.invariant(
        not (mode is not None and preset is not None),
        'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).'
        .format(mode=mode, preset=preset),
    )

    tags = check.opt_dict_param(tags, 'tags', key_type=str)

    run_config = check.opt_inst_param(run_config,
                                      'run_config',
                                      RunConfig,
                                      default=RunConfig())

    if preset is not None:
        pipeline_preset = pipeline_def.get_preset(preset)

        check.invariant(
            run_config.mode is None or pipeline_preset.mode == run_config.mode,
            'The mode set in preset \'{preset}\' (\'{preset_mode}\') does not agree with the mode '
            'set in the `run_config` (\'{run_config_mode}\')'.format(
                preset=preset,
                preset_mode=pipeline_preset.mode,
                run_config_mode=run_config.mode),
        )

        if pipeline_preset.environment_dict is not None:
            check.invariant(
                (not environment_dict)
                or (pipeline_preset.environment_dict == environment_dict),
                'The environment set in preset \'{preset}\' does not agree with the environment '
                'passed in the `environment_dict` argument.'.format(
                    preset=preset),
            )

            environment_dict = pipeline_preset.environment_dict

        if pipeline_preset.solid_subset is not None:
            pipeline = pipeline.build_sub_pipeline(
                pipeline_preset.solid_subset)

        check.invariant(
            mode is None or mode == pipeline_preset.mode,
            'Mode {mode} does not agree with the mode set in preset \'{preset}\': '
            '(\'{preset_mode}\')'.format(preset=preset,
                                         preset_mode=pipeline_preset.mode,
                                         mode=mode),
        )

        mode = pipeline_preset.mode

    if run_config.mode is not None or run_config.tags:
        warnings.warn((
            'In 0.8.0, the use of `run_config` to set pipeline mode and tags will be '
            'deprecated. Please use the `mode` and `tags` arguments to `{fn_name}` '
            'instead.').format(fn_name=fn_name))

    if run_config.mode is not None:
        if mode is not None:
            check.invariant(
                run_config.mode == mode,
                'Mode \'{mode}\' does not agree with the mode set in the `run_config`: '
                '\'{run_config_mode}\''.format(
                    mode=mode, run_config_mode=run_config.mode),
            )
        mode = run_config.mode

    if mode is not None:
        if not pipeline_def.has_mode_definition(mode):
            raise DagsterInvariantViolationError((
                'You have attempted to execute pipeline {name} with mode {mode}. '
                'Available modes: {modes}').format(
                    name=pipeline_def.name,
                    mode=mode,
                    modes=pipeline_def.available_modes,
                ))
    else:
        if not pipeline_def.is_single_mode:
            raise DagsterInvariantViolationError((
                'Pipeline {name} has multiple modes (Available modes: {modes}) and you have '
                'attempted to execute it without specifying a mode. Set '
                'mode property on the PipelineRun object.').format(
                    name=pipeline_def.name,
                    modes=pipeline_def.available_modes))
        mode = pipeline_def.get_default_mode_name()

    tags = merge_dicts(merge_dicts(pipeline_def.tags, run_config.tags or {}),
                       tags)

    check.opt_inst_param(instance, 'instance', DagsterInstance)
    instance = instance or DagsterInstance.ephemeral()

    execution_plan = create_execution_plan(
        pipeline,
        environment_dict,
        mode=mode,
        step_keys_to_execute=run_config.step_keys_to_execute,
    )

    return pipeline, environment_dict, instance, mode, tags, run_config, execution_plan
Exemplo n.º 15
0
def execute_run(
    pipeline: IPipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool = False,
) -> PipelineExecutionResult:
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        if isinstance(pipeline, JobDefinition):
            error = "execute_run requires a reconstructable job but received job definition directly instead."
        else:
            error = (
                "execute_run requires a reconstructable pipeline but received pipeline definition "
                "directly instead.")
        raise DagsterInvariantViolationError(
            f"{error} To support hand-off to other processes please wrap your definition in "
            "a call to reconstructable(). Learn more about reconstructable here: https://docs.dagster.io/_apidocs/execution#dagster.reconstructable"
        )

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Run {} ({}) in state {}, expected NOT_STARTED or STARTING".
        format(pipeline_run.pipeline_name, pipeline_run.run_id,
               pipeline_run.status),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = _get_execution_plan_from_run(pipeline, pipeline_run,
                                                  instance)

    output_capture: Optional[Dict[StepOutputHandle, Any]] = {}

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PlanOrchestrationContextManager(
            context_event_generator=orchestration_context_event_generator,
            pipeline=pipeline,
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
            executor_defs=None,
            output_capture=output_capture,
        ),
    )
    event_list = list(_execute_run_iterable)

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda: scoped_pipeline_context(
            execution_plan,
            pipeline,
            pipeline_run.run_config,
            pipeline_run,
            instance,
        ),
        output_capture=output_capture,
    )
Exemplo n.º 16
0
def date_partition_range(
    start,
    end=None,
    delta_range="days",
    fmt=None,
    inclusive=False,
    timezone=None,
):
    """ Utility function that returns a partition generating function to be used in creating a
    `PartitionSet` definition.

    Args:
        start (datetime): Datetime capturing the start of the time range.
        end  (Optional(datetime)): Datetime capturing the end of the partition.  By default, the
                                   current time is used.  The range is not inclusive of the end
                                   value.
        delta_range (Optional(str)): string representing the time duration of each partition.
            Must be a valid argument to pendulum.period.range ("days", "hours", "months", etc.).
        fmt (Optional(str)): Format string to represent each partition by its start time
        inclusive (Optional(bool)): By default, the partition set only contains date interval
            partitions for which the end time of the interval is less than current time. In other
            words, the partition set contains date interval partitions that are completely in the
            past. If inclusive is set to True, then the partition set will include all date
            interval partitions for which the start time of the interval is less than the
            current time.
        timezone (Optional(str)): Timezone in which the partition values should be expressed.
    Returns:
        Callable[[], List[Partition]]
    """

    check.inst_param(start, "start", datetime.datetime)
    check.opt_inst_param(end, "end", datetime.datetime)
    check.str_param(delta_range, "delta_range")
    fmt = check.opt_str_param(fmt, "fmt", default=DEFAULT_DATE_FORMAT)
    check.opt_str_param(timezone, "timezone")

    delta_amount = 1

    if end and start > end:
        raise DagsterInvariantViolationError(
            'Selected date range start "{start}" is after date range end "{end}'
            .format(
                start=start.strftime(fmt),
                end=end.strftime(fmt),
            ))

    def get_date_range_partitions(current_time=None):
        check.opt_inst_param(current_time, "current_time", datetime.datetime)
        tz = timezone if timezone else pendulum.now().timezone.name
        _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else
                  pendulum.instance(start, tz=tz))

        if end:
            _end = end
        elif current_time:
            _end = current_time
        else:
            _end = pendulum.now(tz)

        # coerce to the definition timezone
        if isinstance(_end, pendulum.Pendulum):
            _end = _end.in_tz(tz)
        else:
            _end = pendulum.instance(_end, tz=tz)

        period = pendulum.period(_start, _end)
        date_names = [
            Partition(value=current, name=current.strftime(fmt))
            for current in period.range(delta_range, delta_amount)
        ]

        # We don't include the last element here by default since we only want
        # fully completed intervals, and the _end time is in the middle of the interval
        # represented by the last element of date_names
        if inclusive:
            return date_names

        return date_names[:-1]

    return get_date_range_partitions
Exemplo n.º 17
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    manager_materializations = []
    manager_metadata_entries = []

    # output_manager.handle_output is either a generator function, or a normal function with or
    # without a return value. In the case that handle_output is a normal function, we need to
    # catch errors should they be raised before a return value. We can do this by wrapping
    # handle_output in a generator so that errors will be caught within iterate_with_context.

    if not inspect.isgeneratorfunction(output_manager.handle_output):

        def _gen_fn():
            gen_output = output_manager.handle_output(output_context, output.value)
            if gen_output:
                yield gen_output

        handle_output_gen = _gen_fn()
    else:
        handle_output_gen = output_manager.handle_output(output_context, output.value)

    for elt in iterate_with_context(
        lambda: solid_execution_error_boundary(
            DagsterExecutionHandleOutputError,
            msg_fn=lambda: (
                f'Error occurred while handling output "{output_context.name}" of '
                f'step "{step_context.step.key}":'
            ),
            step_context=step_context,
            step_key=step_context.step.key,
            output_name=output_context.name,
        ),
        handle_output_gen,
    ):
        if isinstance(elt, AssetMaterialization):
            manager_materializations.append(elt)
        elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)):
            experimental_functionality_warning(
                "Yielding metadata from an IOManager's handle_output() function"
            )
            manager_metadata_entries.append(elt)
        else:
            raise DagsterInvariantViolationError(
                f"IO manager on output {output_def.name} has returned "
                f"value {elt} of type {type(elt).__name__}. The return type can only be "
                "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry."
            )

    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager
    )
    if asset_key:
        for materialization in _get_output_asset_materializations(
            asset_key,
            partitions,
            output,
            output_def,
            manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        metadata_entries=[
            entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry)
        ],
    )
Exemplo n.º 18
0
def reconstructable(target):
    """
    Create a ReconstructablePipeline from a function that returns a PipelineDefinition, or a
    function decorated with :py:func:`@pipeline <dagster.pipeline>`

    When your pipeline must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like dagstermill), Dagster must know how to reconstruct the pipeline
    on the other side of the process boundary.
    
    This function implements a very conservative strategy for reconstructing pipelines, so that
    its behavior is easy to predict, but as a consequence it is not able to reconstruct certain
    kinds of pipelines, such as those defined by lambdas, in nested scopes (e.g., dynamically
    within a method call), or in interactive environments such as the Python REPL or Jupyter
    notebooks.

    If you need to reconstruct pipelines constructed in these ways, you should use
    :py:func:`build_reconstructable_pipeline` instead, which allows you to specify your own
    strategy for reconstructing a pipeline.

    Examples:

    .. code-block:: python

        from dagster import PipelineDefinition, pipeline, recontructable

        @pipeline
        def foo_pipeline():
            ...

        reconstructable_foo_pipeline = reconstructable(foo_pipeline)


        def make_bar_pipeline():
            return PipelineDefinition(...)
            
        reconstructable_bar_pipeline = reconstructable(bar_pipeline)
    """
    from dagster.core.definitions import PipelineDefinition

    if not seven.is_function_or_decorator_instance_of(target,
                                                      PipelineDefinition):
        raise DagsterInvariantViolationError(
            "Reconstructable target should be a function or definition produced "
            "by a decorated function, got {type}.".format(type=type(target)), )

    if seven.is_lambda(target):
        raise DagsterInvariantViolationError(
            "Reconstructable target can not be a lambda. Use a function or "
            "decorated function defined at module scope instead, or use "
            "build_reconstructable_pipeline.")

    if seven.qualname_differs(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target "{target.__name__}" has a different '
            '__qualname__ "{target.__qualname__}" indicating it is not '
            "defined at module scope. Use a function or decorated function "
            "defined at module scope instead, or use build_reconstructable_pipeline."
            .format(target=target))

    python_file = get_python_file_from_previous_stack_frame()
    if python_file.endswith("<stdin>"):
        raise DagsterInvariantViolationError(
            "reconstructable() can not reconstruct pipelines from <stdin>, unable to "
            "target file {}. Use a pipeline defined in a module or file instead, or "
            "use build_reconstructable_pipeline.".format(python_file))
    pointer = FileCodePointer(python_file=python_file,
                              fn_name=target.__name__,
                              working_directory=os.getcwd())

    # ipython:
    # Exception: Can not import module <ipython-input-3-70f55f9e97d2> from path /Users/max/Desktop/richard_brady_repro/<ipython-input-3-70f55f9e97d2>, unable to load spec.
    # Exception: Can not import module  from path /private/var/folders/zc/zyv5jx615157j4mypwcx_kxr0000gn/T/b3edec1e-b4c5-4ea4-a4ae-24a01e566aba/, unable to load spec.
    return bootstrap_standalone_recon_pipeline(pointer)
Exemplo n.º 19
0
 def executor_failing(_):
     raise DagsterInvariantViolationError()
Exemplo n.º 20
0
 def create_location(self) -> NoReturn:
     raise DagsterInvariantViolationError(
         "A RegisteredRepositoryLocationOrigin does not have enough information to load its "
         "repository location on its own.")
Exemplo n.º 21
0
def reconstructable(target):
    """
    Create a :py:class:`~dagster.core.definitions.reconstructable.ReconstructablePipeline` from a
    function that returns a :py:class:`~dagster.PipelineDefinition`/:py:class:`~dagster.JobDefinition`,
    or a function decorated with :py:func:`@pipeline <dagster.pipeline>`/:py:func:`@job <dagster.job>`.

    When your pipeline/job must cross process boundaries, e.g., for execution on multiple nodes or
    in different systems (like ``dagstermill``), Dagster must know how to reconstruct the pipeline/job
    on the other side of the process boundary.

    Passing a job created with ``~dagster.GraphDefinition.to_job`` to ``reconstructable()``,
    requires you to wrap that job's definition in a module-scoped function, and pass that function
    instead:

    .. code-block:: python

        from dagster import graph, reconstructable

        @graph
        def my_graph():
            ...

        def define_my_job():
            return my_graph.to_job()

        reconstructable(define_my_job)

    This function implements a very conservative strategy for reconstruction, so that its behavior
    is easy to predict, but as a consequence it is not able to reconstruct certain kinds of pipelines
    or jobs, such as those defined by lambdas, in nested scopes (e.g., dynamically within a method
    call), or in interactive environments such as the Python REPL or Jupyter notebooks.

    If you need to reconstruct objects constructed in these ways, you should use
    :py:func:`~dagster.reconstructable.build_reconstructable_job` instead, which allows you to
    specify your own reconstruction strategy.

    Examples:

    .. code-block:: python

        from dagster import job, reconstructable

        @job
        def foo_job():
            ...

        reconstructable_foo_job = reconstructable(foo_job)


        @graph
        def foo():
            ...

        def make_bar_job():
            return foo.to_job()

        reconstructable_bar_job = reconstructable(make_bar_job)
    """
    from dagster.core.definitions import PipelineDefinition, JobDefinition

    if not seven.is_function_or_decorator_instance_of(target,
                                                      PipelineDefinition):
        if isinstance(target, JobDefinition):
            raise DagsterInvariantViolationError(
                "Reconstructable target was not a function returning a job definition, or a job "
                "definition produced by a decorated function. If your job was constructed using "
                "``GraphDefinition.to_job``, you must wrap the ``to_job`` call in a function at "
                "module scope, ie not within any other functions. "
                "To learn more, check out the docs on ``reconstructable``: "
                "https://docs.dagster.io/_apidocs/execution#dagster.reconstructable"
            )
        raise DagsterInvariantViolationError(
            "Reconstructable target should be a function or definition produced "
            "by a decorated function, got {type}.".format(type=type(target)), )

    if seven.is_lambda(target):
        raise DagsterInvariantViolationError(
            "Reconstructable target can not be a lambda. Use a function or "
            "decorated function defined at module scope instead, or use "
            "build_reconstructable_job.")

    if seven.qualname_differs(target):
        raise DagsterInvariantViolationError(
            'Reconstructable target "{target.__name__}" has a different '
            '__qualname__ "{target.__qualname__}" indicating it is not '
            "defined at module scope. Use a function or decorated function "
            "defined at module scope instead, or use build_reconstructable_job."
            .format(target=target))

    try:
        if (hasattr(target, "__module__") and hasattr(target, "__name__")
                and inspect.getmodule(target).__name__ != "__main__"):
            return ReconstructablePipeline.for_module(target.__module__,
                                                      target.__name__)
    except:
        pass

    python_file = get_python_file_from_target(target)
    if not python_file:
        raise DagsterInvariantViolationError(
            "reconstructable() can not reconstruct jobs or pipelines defined in interactive "
            "environments like <stdin>, IPython, or Jupyter notebooks. "
            "Use a pipeline defined in a module or file instead, or use build_reconstructable_job."
        )

    pointer = FileCodePointer(python_file=python_file,
                              fn_name=target.__name__,
                              working_directory=os.getcwd())

    return bootstrap_standalone_recon_pipeline(pointer)
Exemplo n.º 22
0
 def create_location(self) -> NoReturn:
     raise DagsterInvariantViolationError(
         "A ManagedGrpcPythonEnvRepositoryLocationOrigin needs a DynamicWorkspace"
         " in order to create a handle.")
Exemplo n.º 23
0
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        solid_selection=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)
        check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan)

        # note that solids_to_execute is required to execute the solid subset, which is the
        # frozenset version of the previous solid_subset.
        # solid_selection is not required and will not be converted to solids_to_execute here.
        # i.e. this function doesn't handle solid queries.
        # solid_selection is only used to pass the user queries further down.
        check.opt_set_param(solids_to_execute,
                            "solids_to_execute",
                            of_type=str)
        check.opt_list_param(solid_selection, "solid_selection", of_type=str)

        if solids_to_execute:
            if isinstance(pipeline_def, PipelineSubsetDefinition):
                # for the case when pipeline_def is created by IPipeline or ExternalPipeline
                check.invariant(
                    solids_to_execute == pipeline_def.solids_to_execute,
                    "Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} "
                    "that conflicts with solids_to_execute arg {solids_to_execute}"
                    .format(
                        pipeline_solids_to_execute=str_format_list(
                            pipeline_def.solids_to_execute),
                        solids_to_execute=str_format_list(solids_to_execute),
                    ),
                )
            else:
                # for cases when `create_run_for_pipeline` is directly called
                pipeline_def = pipeline_def.get_pipeline_subset_def(
                    solids_to_execute=solids_to_execute)

        full_execution_plan = execution_plan or create_execution_plan(
            pipeline_def,
            run_config=run_config,
            mode=mode,
        )
        check.invariant(
            len(full_execution_plan.step_keys_to_execute) == len(
                full_execution_plan.steps))

        if _is_memoized_run(tags):
            if step_keys_to_execute:
                raise DagsterInvariantViolationError(
                    "step_keys_to_execute parameter cannot be used in conjunction with memoized "
                    "pipeline runs.")

            step_keys_to_execute = self.resolve_unmemoized_steps(
                full_execution_plan,
                run_config=run_config,
                mode=mode,
            )  # TODO: tighter integration with existing step_keys_to_execute functionality

        subsetted_execution_plan = (
            full_execution_plan.build_subset_plan(step_keys_to_execute)
            if step_keys_to_execute else full_execution_plan)

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=check.opt_str_param(
                mode, "mode", default=pipeline_def.get_default_mode_name()),
            solid_selection=solid_selection,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                subsetted_execution_plan,
                pipeline_def.get_pipeline_snapshot_id()),
            parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(
            ),
        )
Exemplo n.º 24
0
def assert_in_composition(solid_name: str) -> None:
    if len(_composition_stack) < 1:
        raise DagsterInvariantViolationError(
            'Attempted to call solid "{solid_name}" outside of a composition function. '
            "Calling solids is only valid in a function decorated with "
            "@pipeline or @composite_solid.".format(solid_name=solid_name))
Exemplo n.º 25
0
def execute_run(
    pipeline: IPipeline,
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
    raise_on_error: bool = False,
) -> PipelineExecutionResult:
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (IPipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an IPipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryPipeline."
        )

    check.inst_param(pipeline, "pipeline", IPipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)

    if pipeline_run.status == PipelineRunStatus.CANCELED:
        message = "Not starting execution since the run was canceled before execution could start"
        instance.report_engine_event(
            message,
            pipeline_run,
        )
        raise DagsterInvariantViolationError(message)

    check.invariant(
        pipeline_run.status == PipelineRunStatus.NOT_STARTED
        or pipeline_run.status == PipelineRunStatus.STARTING,
        desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING".format(
            pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status
        ),
    )
    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".format(
                    pipeline_solids_to_execute=str_format_set(pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute
            )

    execution_plan = create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    if is_memoized_run(pipeline_run.tags):
        execution_plan = resolve_memoized_execution_plan(execution_plan, pipeline_run.run_config)

    _execute_run_iterable = ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=pipeline_execution_iterator,
        execution_context_manager=PipelineExecutionContextManager(
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    pipeline_context = _execute_run_iterable.pipeline_context

    # workaround for mem_io_manager to work in reconstruct_context, e.g. result.result_for_solid
    # in-memory values dict will get lost when the resource is re-initiated in reconstruct_context
    # so instead of re-initiating every single resource, we pass the resource instances to
    # reconstruct_context directly to avoid re-building from resource def.
    resource_instances_to_override = {}
    if pipeline_context:  # None if we have a pipeline failure
        for (
            key,
            resource_instance,
        ) in pipeline_context.scoped_resources_builder.resource_instance_dict.items():
            if isinstance(resource_instance, InMemoryIOManager):
                resource_instances_to_override[key] = resource_instance

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda hardcoded_resources_arg: scoped_pipeline_context(
            execution_plan,
            pipeline_run.run_config,
            pipeline_run,
            instance,
            intermediate_storage=pipeline_context.intermediate_storage,
            resource_instances_to_override=hardcoded_resources_arg,
        ),
        resource_instances_to_override=resource_instances_to_override,
    )
Exemplo n.º 26
0
def _validate_and_coerce_solid_result_to_iterator(result, context,
                                                  output_defs):

    if isinstance(result,
                  (AssetMaterialization, Materialization, ExpectationResult)):
        raise DagsterInvariantViolationError((
            "Error in {described_op}: If you are returning an AssetMaterialization "
            "or an ExpectationResult from {node_type} you must yield them to avoid "
            "ambiguity with an implied result from returning a value.".format(
                described_op=context.describe_op(),
                node_type=context.solid_def.node_type_str,
            )))

    if inspect.isgenerator(result):
        # this happens when a user explicitly returns a generator in the solid
        for event in result:
            yield event
    elif isinstance(result, Output):
        yield result
    elif len(output_defs) == 1:
        if result is None and output_defs[0].is_required is False:
            context.log.warn(
                'Value "None" returned for non-required output "{output_name}" of {described_op}. '
                "This value will be passed to downstream {node_type}s. For conditional execution use\n"
                '  yield Output(value, "{output_name}")\n'
                "when you want the downstream {node_type}s to execute, "
                "and do not yield it when you want downstream solids to skip.".
                format(
                    output_name=output_defs[0].name,
                    described_op=context.describe_op(),
                    node_type=context.solid_def.node_type_str,
                ))
        metadata = context.get_output_metadata(output_defs[0].name)
        yield Output(value=result,
                     output_name=output_defs[0].name,
                     metadata=metadata)
    elif len(output_defs) > 1 and isinstance(result, tuple):
        if len(result) != len(output_defs):
            check.failed(
                f"Solid '{context.solid_name}' has {len(output_defs)} output definitions, but "
                f"returned a tuple with {len(result)} elements")

        for output_def, element in zip(output_defs, result):
            metadata = context.get_output_metadata(output_def.name)
            yield Output(output_name=output_def.name,
                         value=element,
                         metadata=metadata)
    elif result is not None:
        if not output_defs:
            raise DagsterInvariantViolationError((
                "Error in {described_op}: Unexpectedly returned output {result} "
                "of type {type_}. {node_type} is explicitly defined to return no "
                "results.").format(
                    described_op=context.describe_op(),
                    result=result,
                    type_=type(result),
                    node_type=context.solid_def.node_type_str.capitalize(),
                ))

        raise DagsterInvariantViolationError(
            ("Error in {described_op}: {node_type} unexpectedly returned "
             "output {result} of type {type_}. Should "
             "be a generator, containing or yielding "
             "{n_results} results: {{{expected_results}}}.").format(
                 described_op=context.describe_op(),
                 node_type=context.solid_def.node_type_str,
                 result=result,
                 type_=type(result),
                 n_results=len(output_defs),
                 expected_results=", ".join([
                     "'{result_name}': {dagster_type}".format(
                         result_name=output_def.name,
                         dagster_type=output_def.dagster_type,
                     ) for output_def in output_defs
                 ]),
             ))
Exemplo n.º 27
0
def get_step_input(plan_builder, solid, input_name, input_def,
                   dependency_structure, handle, parent_step_inputs):
    check.inst_param(plan_builder, 'plan_builder', _PlanBuilder)
    check.inst_param(solid, 'solid', Solid)
    check.str_param(input_name, 'input_name')
    check.inst_param(input_def, 'input_def', InputDefinition)
    check.inst_param(dependency_structure, 'dependency_structure',
                     DependencyStructure)
    check.opt_inst_param(handle, 'handle', SolidHandle)
    check.opt_list_param(parent_step_inputs,
                         'parent_step_inputs',
                         of_type=StepInput)

    solid_config = plan_builder.environment_config.solids.get(str(handle))
    if solid_config and input_name in solid_config.inputs:
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.CONFIG,
            config_data=solid_config.inputs[input_name],
        )

    input_handle = solid.input_handle(input_name)
    if dependency_structure.has_singular_dep(input_handle):
        solid_output_handle = dependency_structure.get_singular_dep(
            input_handle)
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.SINGLE_OUTPUT,
            source_handles=[
                plan_builder.get_output_handle(solid_output_handle)
            ],
        )

    if dependency_structure.has_multi_deps(input_handle):
        solid_output_handles = dependency_structure.get_multi_deps(
            input_handle)
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.MULTIPLE_OUTPUTS,
            source_handles=[
                plan_builder.get_output_handle(solid_output_handle)
                for solid_output_handle in solid_output_handles
            ],
        )

    if solid.container_maps_input(input_name):
        parent_name = solid.container_mapped_input(input_name).definition.name
        parent_inputs = {
            step_input.name: step_input
            for step_input in parent_step_inputs
        }
        if parent_name in parent_inputs:
            parent_input = parent_inputs[parent_name]
            return StepInput(
                name=input_name,
                dagster_type=input_def.dagster_type,
                source_type=parent_input.source_type,
                source_handles=parent_input.source_handles,
                config_data=parent_input.config_data,
            )

    if solid.definition.input_has_default(input_name):
        return StepInput(
            name=input_name,
            dagster_type=input_def.dagster_type,
            source_type=StepInputSourceType.DEFAULT_VALUE,
            config_data=solid.definition.default_value_for_input(input_name),
        )

    # At this point we have an input that is not hooked up to
    # the output of another solid or provided via environment config.

    # We will allow this for "Nothing" type inputs and continue.
    if input_def.dagster_type.kind == DagsterTypeKind.NOTHING:
        return None

    # Otherwise we throw an error.
    raise DagsterInvariantViolationError(
        ('In pipeline {pipeline_name} solid {solid_name}, input {input_name} '
         'must get a value either (a) from a dependency or (b) from the '
         'inputs section of its configuration.').format(
             pipeline_name=plan_builder.pipeline_name,
             solid_name=solid.name,
             input_name=input_name))
Exemplo n.º 28
0
def _store_output(
    step_context: StepExecutionContext,
    step_output_handle: StepOutputHandle,
    output: Union[Output, DynamicOutput],
    input_lineage: List[AssetLineageInfo],
) -> Iterator[DagsterEvent]:

    output_def = step_context.solid_def.output_def_named(step_output_handle.output_name)
    output_manager = step_context.get_io_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)

    with user_code_error_boundary(
        DagsterExecutionHandleOutputError,
        control_flow_exceptions=[Failure, RetryRequested],
        msg_fn=lambda: (
            f'Error occurred while handling output "{output_context.name}" of '
            f'step "{step_context.step.key}":'
        ),
        step_key=step_context.step.key,
        output_name=output_context.name,
    ):
        handle_output_res = output_manager.handle_output(output_context, output.value)

    manager_materializations = []
    manager_metadata_entries = []
    if handle_output_res is not None:
        for elt in ensure_gen(handle_output_res):
            if isinstance(elt, AssetMaterialization):
                manager_materializations.append(elt)
            elif isinstance(elt, (EventMetadataEntry, PartitionMetadataEntry)):
                experimental_functionality_warning(
                    "Yielding metadata from an IOManager's handle_output() function"
                )
                manager_metadata_entries.append(elt)
            else:
                raise DagsterInvariantViolationError(
                    f"IO manager on output {output_def.name} has returned "
                    f"value {elt} of type {type(elt).__name__}. The return type can only be "
                    "one of AssetMaterialization, EventMetadataEntry, PartitionMetadataEntry."
                )

    # do not alter explicitly created AssetMaterializations
    for materialization in manager_materializations:
        yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    asset_key, partitions = _asset_key_and_partitions_for_output(
        output_context, output_def, output_manager
    )
    if asset_key:
        for materialization in _get_output_asset_materializations(
            asset_key,
            partitions,
            output,
            output_def,
            manager_metadata_entries,
        ):
            yield DagsterEvent.asset_materialization(step_context, materialization, input_lineage)

    yield DagsterEvent.handled_output(
        step_context,
        output_name=step_output_handle.output_name,
        manager_key=output_def.io_manager_key,
        message_override=f'Handled input "{step_output_handle.output_name}" using intermediate storage'
        if isinstance(output_manager, IntermediateStorageAdapter)
        else None,
        metadata_entries=[
            entry for entry in manager_metadata_entries if isinstance(entry, EventMetadataEntry)
        ],
    )
Exemplo n.º 29
0
def execute_run(pipeline, pipeline_run, instance, raise_on_error=False):
    """Executes an existing pipeline run synchronously.

    Synchronous version of execute_run_iterator.

    Args:
        pipeline (ExecutablePipeline): The pipeline to execute.
        pipeline_run (PipelineRun): The run to execute
        instance (DagsterInstance): The instance in which the run has been created.
        raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur.
            Defaults to ``False``.

    Returns:
        PipelineExecutionResult: The result of the execution.
    """
    if isinstance(pipeline, PipelineDefinition):
        raise DagsterInvariantViolationError(
            "execute_run requires an ExecutablePipeline but received a PipelineDefinition "
            "directly instead. To support hand-off to other processes provide a "
            "ReconstructablePipeline which can be done using reconstructable(). For in "
            "process only execution you can use InMemoryExecutablePipeline.")

    check.inst_param(pipeline, "pipeline", ExecutablePipeline)
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    check.inst_param(instance, "instance", DagsterInstance)
    check.invariant(pipeline_run.status == PipelineRunStatus.NOT_STARTED)

    pipeline_def = pipeline.get_definition()
    if pipeline_run.solids_to_execute:
        if isinstance(pipeline_def, PipelineSubsetDefinition):
            check.invariant(
                pipeline_run.solids_to_execute == pipeline.solids_to_execute,
                "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that "
                "conflicts with pipeline subset {pipeline_solids_to_execute}.".
                format(
                    pipeline_solids_to_execute=str_format_set(
                        pipeline.solids_to_execute),
                    solids_to_execute=str_format_set(
                        pipeline_run.solids_to_execute),
                ),
            )
        else:
            # when `execute_run` is directly called, the sub pipeline hasn't been created
            # note that when we receive the solids to execute via PipelineRun, it won't support
            # solid selection query syntax
            pipeline = pipeline.subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

    execution_plan = create_execution_plan(
        pipeline,
        run_config=pipeline_run.run_config,
        mode=pipeline_run.mode,
        step_keys_to_execute=pipeline_run.step_keys_to_execute,
    )

    _execute_run_iterable = _ExecuteRunWithPlanIterable(
        execution_plan=execution_plan,
        iterator=_pipeline_execution_iterator,
        execution_context_manager=PipelineExecutionContextManager(
            execution_plan=execution_plan,
            pipeline_run=pipeline_run,
            instance=instance,
            run_config=pipeline_run.run_config,
            raise_on_error=raise_on_error,
        ),
    )
    event_list = list(_execute_run_iterable)
    pipeline_context = _execute_run_iterable.pipeline_context

    return PipelineExecutionResult(
        pipeline.get_definition(),
        pipeline_run.run_id,
        event_list,
        lambda: scoped_pipeline_context(
            execution_plan,
            pipeline_run.run_config,
            pipeline_run,
            instance,
            intermediate_storage=pipeline_context.intermediate_storage,
            system_storage_data=SystemStorageData(
                intermediate_storage=pipeline_context.intermediate_storage,
                file_manager=pipeline_context.file_manager,
            ),
        ),
    )
Exemplo n.º 30
0
def _check_execute_pipeline_args(pipeline,
                                 run_config,
                                 mode,
                                 preset,
                                 tags,
                                 instance,
                                 solid_selection=None):
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)

    run_config = check.opt_dict_param(run_config, 'run_config')
    check.opt_str_param(mode, 'mode')
    check.opt_str_param(preset, 'preset')
    check.invariant(
        not (mode is not None and preset is not None),
        'You may set only one of `mode` (got {mode}) or `preset` (got {preset}).'
        .format(mode=mode, preset=preset),
    )

    tags = check.opt_dict_param(tags, 'tags', key_type=str)
    check.opt_list_param(solid_selection, 'solid_selection', of_type=str)

    if preset is not None:
        pipeline_preset = pipeline_def.get_preset(preset)

        if pipeline_preset.run_config is not None:
            check.invariant(
                (not run_config) or (pipeline_preset.run_config == run_config),
                'The environment set in preset \'{preset}\' does not agree with the environment '
                'passed in the `run_config` argument.'.format(preset=preset),
            )

            run_config = pipeline_preset.run_config

        # load solid_selection from preset
        if pipeline_preset.solid_selection is not None:
            check.invariant(
                solid_selection is None
                or solid_selection == pipeline_preset.solid_selection,
                'The solid_selection set in preset \'{preset}\', {preset_subset}, does not agree with '
                'the `solid_selection` argument: {solid_selection}'.format(
                    preset=preset,
                    preset_subset=pipeline_preset.solid_selection,
                    solid_selection=solid_selection,
                ),
            )
            solid_selection = pipeline_preset.solid_selection

        check.invariant(
            mode is None or mode == pipeline_preset.mode,
            'Mode {mode} does not agree with the mode set in preset \'{preset}\': '
            '(\'{preset_mode}\')'.format(preset=preset,
                                         preset_mode=pipeline_preset.mode,
                                         mode=mode),
        )

        mode = pipeline_preset.mode

    if mode is not None:
        if not pipeline_def.has_mode_definition(mode):
            raise DagsterInvariantViolationError((
                'You have attempted to execute pipeline {name} with mode {mode}. '
                'Available modes: {modes}').format(
                    name=pipeline_def.name,
                    mode=mode,
                    modes=pipeline_def.available_modes,
                ))
    else:
        if pipeline_def.is_multi_mode:
            raise DagsterInvariantViolationError((
                'Pipeline {name} has multiple modes (Available modes: {modes}) and you have '
                'attempted to execute it without specifying a mode. Set '
                'mode property on the PipelineRun object.').format(
                    name=pipeline_def.name,
                    modes=pipeline_def.available_modes))
        mode = pipeline_def.get_default_mode_name()

    tags = merge_dicts(pipeline_def.tags, tags)

    check.opt_inst_param(instance, 'instance', DagsterInstance)
    instance = instance or DagsterInstance.ephemeral()

    # generate pipeline subset from the given solid_selection
    if solid_selection:
        pipeline = pipeline.subset_for_execution(solid_selection)

    return (
        pipeline,
        run_config,
        instance,
        mode,
        tags,
        pipeline.solids_to_execute,
        solid_selection,
    )