示例#1
0
def _validate_subset_info(subset_info, execution_plan, pipeline_context):
    if not subset_info:
        return

    for step_key in subset_info.subset:
        if not execution_plan.has_step(step_key):
            raise DagsterExecutionStepNotFoundError(
                'Step {step_key} does not exist.'.format(step_key=step_key),
                step_key=step_key)

    for step_key, input_dict in subset_info.input_step_factory_fns.items():
        if not execution_plan.has_step(step_key):
            raise DagsterExecutionStepNotFoundError(
                'Step {step_key} does not exist.'.format(step_key=step_key),
                step_key=step_key)

        step = execution_plan.get_step_by_key(step_key)

        for input_name in input_dict.keys():
            if not step.has_step_input(input_name):
                raise DagsterInvalidSubplanInputNotFoundError(
                    'Input {input_name} on {step_key} does not exist.'.format(
                        input_name=input_name, step_key=step_key),
                    pipeline_name=pipeline_context.pipeline_def.name,
                    step_keys=list(subset_info.subset),
                    input_name=input_name,
                    step=step,
                )
示例#2
0
    def __new__(
        cls,
        pipeline,
        step_dict,
        step_handles_to_execute,
        environment_config,
        known_state=None,
    ):
        check.list_param(
            step_handles_to_execute,
            "step_handles_to_execute",
            of_type=(StepHandle, UnresolvedStepHandle, ResolvedFromDynamicStepHandle),
        )
        missing_steps = [
            step_handle.to_key()
            for step_handle in step_handles_to_execute
            if step_handle not in step_dict
        ]
        if missing_steps:
            raise DagsterExecutionStepNotFoundError(
                "Execution plan does not contain step{plural}: {steps}".format(
                    plural="s" if len(missing_steps) > 1 else "", steps=", ".join(missing_steps)
                ),
                step_keys=missing_steps,
            )

        executable_map = {}
        for handle in step_handles_to_execute:
            step = step_dict[handle]
            if isinstance(step, ExecutionStep):
                executable_map[step.key] = step.handle

        resolvable_map: Dict[str, List[UnresolvedStepHandle]] = defaultdict(list)
        for handle in step_handles_to_execute:
            step = step_dict[handle]
            if isinstance(step, UnresolvedExecutionStep):
                if step.resolved_by_step_key not in executable_map:
                    raise DagsterInvariantViolationError(
                        f'UnresolvedExecutionStep "{step.key}" is resolved by "{step.resolved_by_step_key}" '
                        "which is not part of the current step selection"
                    )
                resolvable_map[step.resolved_by_step_key].append(step.handle)

        return super(ExecutionPlan, cls).__new__(
            cls,
            pipeline=check.inst_param(pipeline, "pipeline", IPipeline),
            step_dict=check.dict_param(
                step_dict,
                "step_dict",
                key_type=StepHandleTypes,
                value_type=(ExecutionStep, UnresolvedExecutionStep),
            ),
            executable_map=executable_map,
            resolvable_map=resolvable_map,
            step_handles_to_execute=step_handles_to_execute,
            environment_config=check.inst_param(
                environment_config, "environment_config", EnvironmentConfig
            ),
            known_state=check.opt_inst_param(known_state, "known_state", KnownExecutionState),
        )
示例#3
0
    def build_subset_plan(self,
                          step_keys_to_execute: List[str]) -> "ExecutionPlan":
        check.list_param(step_keys_to_execute,
                         "step_keys_to_execute",
                         of_type=str)
        step_handles_to_execute = [
            StepHandle.parse_from_key(key) for key in step_keys_to_execute
        ]

        bad_keys = []
        for handle in step_handles_to_execute:
            if handle not in self.step_dict:
                bad_keys.append(handle.to_key())

        if bad_keys:
            raise DagsterExecutionStepNotFoundError(
                f"Can not build subset plan from unknown step{'s' if len(bad_keys)> 1 else ''}: {', '.join(bad_keys)}",
                step_keys=bad_keys,
            )

        return ExecutionPlan(
            self.pipeline,
            self.step_dict,
            step_handles_to_execute,
            self.environment_config,
            self.known_state,
        )
示例#4
0
文件: plan.py 项目: iamahern/dagster
 def __new__(
     cls,
     pipeline,
     step_dict,
     deps,
     artifacts_persisted,
     step_keys_to_execute,
 ):
     missing_steps = [
         step_key for step_key in step_keys_to_execute
         if step_key not in step_dict
     ]
     if missing_steps:
         raise DagsterExecutionStepNotFoundError(
             "Execution plan does not contain step{plural}: {steps}".format(
                 plural="s" if len(missing_steps) > 1 else "",
                 steps=", ".join(missing_steps)),
             step_keys=missing_steps,
         )
     return super(ExecutionPlan, cls).__new__(
         cls,
         pipeline=check.inst_param(pipeline, "pipeline", IPipeline),
         step_dict=check.dict_param(step_dict,
                                    "step_dict",
                                    key_type=str,
                                    value_type=ExecutionStep),
         deps=check.dict_param(deps, "deps", key_type=str, value_type=set),
         steps=list(step_dict.values()),
         artifacts_persisted=check.bool_param(artifacts_persisted,
                                              "artifacts_persisted"),
         step_keys_to_execute=check.list_param(step_keys_to_execute,
                                               "step_keys_to_execute",
                                               of_type=str),
     )
示例#5
0
文件: api.py 项目: cuulee/dagster
def _check_reexecution_config(pipeline_context, execution_plan, run_config):
    check.invariant(pipeline_context.run_storage)

    if not pipeline_context.run_storage.is_persistent:
        raise DagsterInvariantViolationError(
            'Cannot perform reexecution with non persistent run storage.')

    previous_run_id = run_config.reexecution_config.previous_run_id

    if not pipeline_context.run_storage.has_run(previous_run_id):
        raise DagsterRunNotFoundError(
            'Run id {} set as previous run id was not found in run storage'.
            format(previous_run_id),
            invalid_run_id=previous_run_id,
        )

    for step_output_handle in run_config.reexecution_config.step_output_handles:
        if not execution_plan.has_step(step_output_handle.step_key):
            raise DagsterExecutionStepNotFoundError(
                ('Step {step_key} was specified as a step from a previous run. '
                 'It does not exist.').format(
                     step_key=step_output_handle.step_key),
                step_key=step_output_handle.step_key,
            )

        step = execution_plan.get_step_by_key(step_output_handle.step_key)
        if not step.has_step_output(step_output_handle.output_name):
            raise DagsterStepOutputNotFoundError(
                ('You specified a step_output_handle in the ReexecutionConfig that does '
                 'not exist: Step {step_key} does not have output {output_name}.'
                 ).format(step_key=step_output_handle.step_key,
                          output_name=step_output_handle.output_name),
                step_key=step_output_handle.step_key,
                output_name=step_output_handle.output_name,
            )
示例#6
0
文件: api.py 项目: cuulee/dagster
def execute_plan(execution_plan,
                 environment_dict=None,
                 run_config=None,
                 step_keys_to_execute=None):
    '''This is the entry point of dagster-graphql executions. For the dagster CLI entry point, see
    execute_pipeline() above.
    '''
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict')
    run_config = check_run_config_param(run_config,
                                        execution_plan.pipeline_def)
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    if step_keys_to_execute:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(
                    'Execution plan does not contain step "{}"'.format(
                        step_key),
                    step_key=step_key)

    with scoped_pipeline_context(execution_plan.pipeline_def, environment_dict,
                                 run_config) as pipeline_context:

        _setup_reexecution(run_config, pipeline_context, execution_plan)

        return list(
            invoke_executor_on_plan(pipeline_context, execution_plan,
                                    step_keys_to_execute))
示例#7
0
def _validate_added_outputs(added_outputs, execution_plan, subset_info,
                            pipeline_context):
    if not added_outputs:
        return

    for step_key, outputs_for_step in added_outputs.output_step_factory_fns.items(
    ):
        if not _is_step_in_subset(execution_plan, subset_info, step_key):
            raise DagsterExecutionStepNotFoundError(
                'Step {step_key} does not exist.'.format(step_key=step_key),
                step_key=step_key)

        step = execution_plan.get_step_by_key(step_key)

        for output in outputs_for_step:
            check.inst(output, OutputStepFactoryEntry)
            output_name = output.output_name
            if not step.has_step_output(output_name):
                raise DagsterInvalidSubplanOutputNotFoundError(
                    'Execution step {step_key} does not have output {output}'.
                    format(step_key=step_key, output=output_name),
                    pipeline_name=pipeline_context.pipeline_def.name,
                    step_keys=list(subset_info.subset),
                    step=step,
                    output_name=output_name,
                )
示例#8
0
文件: plan.py 项目: cmrajan/dagster
 def __new__(
     cls,
     pipeline_def,
     step_dict,
     deps,
     artifacts_persisted,
     previous_run_id,
     step_keys_to_execute,
 ):
     missing_steps = [step_key for step_key in step_keys_to_execute if step_key not in step_dict]
     if missing_steps:
         raise DagsterExecutionStepNotFoundError(
             'Execution plan does not contain step{plural}: {steps}'.format(
                 plural='s' if len(missing_steps) > 1 else '', steps=', '.join(missing_steps)
             ),
             step_keys=missing_steps,
         )
     return super(ExecutionPlan, cls).__new__(
         cls,
         pipeline_def=check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition),
         step_dict=check.dict_param(
             step_dict, 'step_dict', key_type=str, value_type=ExecutionStep
         ),
         deps=check.dict_param(deps, 'deps', key_type=str, value_type=set),
         steps=list(step_dict.values()),
         artifacts_persisted=check.bool_param(artifacts_persisted, 'artifacts_persisted'),
         previous_run_id=check.opt_str_param(previous_run_id, 'previous_run_id'),
         step_keys_to_execute=check.list_param(
             step_keys_to_execute, 'step_keys_to_execute', of_type=str
         ),
     )
示例#9
0
    def __new__(
        cls, pipeline, step_dict, step_handles_to_execute, artifacts_persisted, environment_config,
    ):
        check.list_param(step_handles_to_execute, "step_handles_to_execute", of_type=StepHandle)
        missing_steps = [
            step_handle.to_key()
            for step_handle in step_handles_to_execute
            if step_handle not in step_dict
        ]
        if missing_steps:
            raise DagsterExecutionStepNotFoundError(
                "Execution plan does not contain step{plural}: {steps}".format(
                    plural="s" if len(missing_steps) > 1 else "", steps=", ".join(missing_steps)
                ),
                step_keys=missing_steps,
            )

        executable_map = {}
        for handle in step_handles_to_execute:
            step = step_dict[handle]
            executable_map[step.key] = step.handle

        return super(ExecutionPlan, cls).__new__(
            cls,
            pipeline=check.inst_param(pipeline, "pipeline", IPipeline),
            step_dict=check.dict_param(
                step_dict, "step_dict", key_type=StepHandle, value_type=ExecutionStep,
            ),
            executable_map=executable_map,
            artifacts_persisted=check.bool_param(artifacts_persisted, "artifacts_persisted"),
            step_handles_to_execute=step_handles_to_execute,
            environment_config=check.inst_param(
                environment_config, "environment_config", EnvironmentConfig
            ),
        )
示例#10
0
文件: api.py 项目: lceames/dagster
def _resolve_step_keys(execution_plan, step_keys_to_execute):
    if step_keys_to_execute is None:
        step_keys_to_execute = [step.key for step in execution_plan.topological_steps()]
    else:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(
                    'Execution plan does not contain step \'{}\''.format(step_key),
                    step_key=step_key,
                )
    return step_keys_to_execute
示例#11
0
def _compute_step_maps(step_dict, step_handles_to_execute, known_state):
    check.list_param(
        step_handles_to_execute,
        "step_handles_to_execute",
        of_type=(StepHandle, UnresolvedStepHandle,
                 ResolvedFromDynamicStepHandle),
    )

    missing_steps = [
        step_handle.to_key() for step_handle in step_handles_to_execute
        if step_handle not in step_dict
    ]
    if missing_steps:
        raise DagsterExecutionStepNotFoundError(
            "Execution plan does not contain step{plural}: {steps}".format(
                plural="s" if len(missing_steps) > 1 else "",
                steps=", ".join(missing_steps)),
            step_keys=missing_steps,
        )

    step_keys_to_execute = [
        step_handle.to_key() for step_handle in step_handles_to_execute
    ]

    executable_map = {}
    resolvable_map: Dict[str, List[UnresolvedStepHandle]] = defaultdict(list)
    for handle in step_handles_to_execute:
        step = step_dict[handle]
        if isinstance(step, ExecutionStep):
            executable_map[step.key] = step.handle
        elif isinstance(
                step,
            (UnresolvedMappedExecutionStep, UnresolvedCollectExecutionStep)):
            for key in step.resolved_by_step_keys:
                if key not in step_keys_to_execute:
                    raise DagsterInvariantViolationError(
                        f'Unresolved ExecutionStep "{step.key}" is resolved by "{step.resolved_by_step_key}" '
                        "which is not part of the current step selection")

            resolvable_map[step.resolved_by_step_keys].append(step.handle)
        else:
            check.invariant(step.key in executable_map,
                            "Expect all steps to be executable or resolvable")

    if known_state:
        _update_from_resolved_dynamic_outputs(
            step_dict,
            executable_map,
            resolvable_map,
            step_handles_to_execute,
            known_state.dynamic_mappings,
        )

    return (executable_map, resolvable_map)
示例#12
0
def invoke_executor_on_plan(pipeline_context,
                            execution_plan,
                            step_keys_to_execute=None):
    if step_keys_to_execute:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(step_key=step_key)

    # Engine execution returns a generator of yielded events, so returning here means this function
    # also returns a generator
    return pipeline_context.executor_config.get_engine().execute(
        pipeline_context, execution_plan, step_keys_to_execute)
示例#13
0
def _resolve_step_keys(execution_plan, step_keys_to_execute):
    if step_keys_to_execute is None:
        step_keys_to_execute = [step.key for step in execution_plan.topological_steps()]
    else:
        missing_steps = [
            step_key for step_key in step_keys_to_execute if not execution_plan.has_step(step_key)
        ]
        if missing_steps:
            raise DagsterExecutionStepNotFoundError(
                'Execution plan does not contain step{plural}: {steps}'.format(
                    plural='s' if len(missing_steps) > 1 else '', steps=', '.join(missing_steps)
                ),
                step_keys=missing_steps,
            )
    return step_keys_to_execute
示例#14
0
def parse_step_selection(step_deps, step_selection):
    """Take the dependency dictionary generated while building execution plan and a list of step key
     selection queries and return a set of the qualified step keys.

    It currently only supports top-level solids.

    Args:
        step_deps (Dict[str, Set[str]]): a dictionary of execution step dependency where the key is
            a step key and the value is a set of direct upstream dependency of the step.
        step_selection (List[str]): a list of the step key selection queries (including single
            step key) to execute.

    Returns:
        FrozenSet[str]: a frozenset of qualified deduplicated solid names, empty if no qualified
            subset selected.
    """
    check.list_param(step_selection, "step_selection", of_type=str)

    # reverse step_deps to get the downstream_deps
    # make sure we have all items as keys, including the ones without downstream dependencies
    downstream_deps = defaultdict(set, {k: set() for k in step_deps.keys()})
    for downstream_key, upstream_keys in step_deps.items():
        for step_key in upstream_keys:
            downstream_deps[step_key].add(downstream_key)

    # generate dep graph
    graph = {"upstream": step_deps, "downstream": downstream_deps}
    steps_set = set()

    step_keys = parse_items_from_selection(step_selection)
    invalid_keys = [key for key in step_keys if key not in step_deps]
    if invalid_keys:
        raise DagsterExecutionStepNotFoundError(
            f"Step selection refers to unknown step{'s' if len(invalid_keys)> 1 else ''}: {', '.join(invalid_keys)}",
            step_keys=invalid_keys,
        )

    # loop over clauses
    for clause in step_selection:
        subset = clause_to_subset(graph, clause)
        if len(subset) == 0:
            raise DagsterInvalidSubsetError(
                "No qualified steps to execute found for step_selection={requested}"
                .format(requested=step_selection), )
        steps_set.update(subset)

    return frozenset(steps_set)
示例#15
0
文件: plan.py 项目: pawelad/dagster
    def build_subset_plan(
        self,
        step_keys_to_execute: List[str],
        pipeline_def: PipelineDefinition,
        environment_config: EnvironmentConfig,
    ) -> "ExecutionPlan":
        check.list_param(step_keys_to_execute,
                         "step_keys_to_execute",
                         of_type=str)
        step_handles_to_execute = [
            StepHandle.parse_from_key(key) for key in step_keys_to_execute
        ]

        bad_keys = []
        for handle in step_handles_to_execute:
            if handle not in self.step_dict:
                bad_keys.append(handle.to_key())

        if bad_keys:
            raise DagsterExecutionStepNotFoundError(
                f"Can not build subset plan from unknown step{'s' if len(bad_keys)> 1 else ''}: {', '.join(bad_keys)}",
                step_keys=bad_keys,
            )

        executable_map, resolvable_map = _compute_step_maps(
            self.step_dict,
            step_handles_to_execute,
            self.known_state,
        )

        return ExecutionPlan(
            self.step_dict,
            executable_map,
            resolvable_map,
            step_handles_to_execute,
            self.known_state,
            _compute_artifacts_persisted(
                self.step_dict,
                step_handles_to_execute,
                pipeline_def,
                environment_config,
                executable_map,
            ),
        )
示例#16
0
文件: plan.py 项目: joxxperez/dagster
    def build_subset_plan(self,
                          step_keys_to_execute: List[str]) -> "ExecutionPlan":
        check.list_param(step_keys_to_execute,
                         "step_keys_to_execute",
                         of_type=str)
        step_handles_to_execute = [
            StepHandle.parse_from_key(key) for key in step_keys_to_execute
        ]

        bad_keys = []
        for handle in step_handles_to_execute:
            if handle in self.step_dict:
                pass  # no further processing required
            elif (isinstance(handle, ResolvedFromDynamicStepHandle)
                  and handle.unresolved_form in self.step_dict):
                unresolved_step = cast(UnresolvedExecutionStep,
                                       self.step_dict[handle.unresolved_form])
                # self.step_dict updated as side effect
                self.resolve(
                    unresolved_step.resolved_by_step_key,
                    {
                        unresolved_step.resolved_by_output_name:
                        [handle.mapping_key]
                    },
                )
                check.invariant(
                    handle in self.step_dict,
                    f"Handle did not resolve as expected, not found in step dict {handle}",
                )
            else:
                bad_keys.append(handle.to_key())

        if bad_keys:
            raise DagsterExecutionStepNotFoundError(
                f"Can not build subset plan from unknown step{'s' if len(bad_keys)> 1 else ''}: {', '.join(bad_keys)}",
                step_keys=bad_keys,
            )

        return ExecutionPlan(
            self.pipeline,
            self.step_dict,
            step_handles_to_execute,
            self.environment_config,
        )
示例#17
0
def invoke_executor_on_plan(pipeline_context, execution_plan, step_keys_to_execute=None):
    if step_keys_to_execute:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(step_key=step_key)

    # Toggle engine based on executor config supplied by the pipeline context
    def get_engine_for_config(cfg):
        if isinstance(cfg, InProcessExecutorConfig):
            return InProcessEngine
        elif isinstance(cfg, MultiprocessExecutorConfig):
            return MultiprocessingEngine
        else:
            check.failed('Unsupported config {}'.format(cfg))

    # Engine execution returns a generator of yielded events, so returning here means this function
    # also returns a generator
    return get_engine_for_config(pipeline_context.executor_config).execute(
        pipeline_context, execution_plan, step_keys_to_execute
    )
示例#18
0
文件: api.py 项目: databill86/dagster
def _steps_execution_iterator(pipeline_context, execution_plan, run_config,
                              step_keys_to_execute):
    '''Iterates over execution of individual steps yielding the associated events.
    Does not yield pipeline level events asside from init failure when the context fails to construct.
    '''
    check.inst_param(pipeline_context, 'pipeline_context',
                     (DagsterEvent, SystemPipelineExecutionContext))
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(run_config, 'run_config', RunConfig)
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    if (isinstance(pipeline_context, DagsterEvent)
            and pipeline_context.event_type  # pylint: disable=no-member
            == DagsterEventType.PIPELINE_INIT_FAILURE):
        return ensure_gen(pipeline_context)

    if not step_keys_to_execute:
        step_keys_to_execute = [
            step.key for step in execution_plan.topological_steps()
        ]

    if not step_keys_to_execute:
        pipeline_context.log.debug(
            'Pipeline {pipeline} has no steps to execute and no execution will happen'
            .format(pipeline=pipeline_context.pipeline_def.display_name))
        return ensure_gen(DagsterEvent.pipeline_success(pipeline_context))
    else:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(
                    'Execution plan does not contain step \'{}\''.format(
                        step_key),
                    step_key=step_key,
                )

    _setup_reexecution(run_config, pipeline_context, execution_plan)

    return _invoke_executor_on_plan(pipeline_context, execution_plan,
                                    step_keys_to_execute)
示例#19
0
def _execute_plan_iterator(pipeline_context, execution_plan, run_config,
                           step_keys_to_execute):
    check.inst_param(pipeline_context, 'pipeline_context',
                     (DagsterEvent, SystemPipelineExecutionContext))
    check.inst_param(execution_plan, 'execution_plan', ExecutionPlan)
    check.inst_param(run_config, 'run_config', RunConfig)
    check.opt_list_param(step_keys_to_execute,
                         'step_keys_to_execute',
                         of_type=str)

    if (isinstance(pipeline_context, DagsterEvent)
            and pipeline_context.event_type  # pylint: disable=no-member
            == DagsterEventType.PIPELINE_INIT_FAILURE):
        return ensure_gen(pipeline_context)

    if not step_keys_to_execute:
        step_keys_to_execute = [
            step.key for step in execution_plan.topological_steps()
        ]

    if not step_keys_to_execute:
        pipeline_context.log.debug(
            'Pipeline {pipeline} has no steps to execute and no execution will happen'
            .format(pipeline=pipeline_context.pipeline_def.display_name))
        return ensure_gen(DagsterEvent.pipeline_success(pipeline_context))
    else:
        for step_key in step_keys_to_execute:
            if not execution_plan.has_step(step_key):
                raise DagsterExecutionStepNotFoundError(
                    'Execution plan does not contain step "{}"'.format(
                        step_key),
                    step_key=step_key)

    _setup_reexecution(run_config, pipeline_context, execution_plan)

    return _invoke_executor_on_plan(pipeline_context, execution_plan,
                                    step_keys_to_execute)