Пример #1
0
def create_execution_plan_core(execution_info):
    check.inst_param(execution_info, 'execution_info', ExecutionPlanInfo)

    state = StepBuilderState(steps=[], step_output_map=StepOutputMap())

    for pipeline_solid in solids_in_topological_order(execution_info.pipeline):

        step_inputs = create_step_inputs(execution_info, state, pipeline_solid)

        solid_transform_step = create_transform_step(
            execution_info,
            pipeline_solid,
            step_inputs,
            get_solid_user_config(execution_info, pipeline_solid),
        )

        state.steps.append(solid_transform_step)

        for output_def in pipeline_solid.definition.output_defs:
            subplan = create_subplan_for_output(
                execution_info, pipeline_solid, solid_transform_step, output_def
            )
            state.steps.extend(subplan.steps)

            output_handle = pipeline_solid.output_handle(output_def.name)
            state.step_output_map[output_handle] = subplan.terminal_step_output_handle

    return create_execution_plan_from_steps(state.steps)
Пример #2
0
    def build(self):
        '''Builds the execution plan.
        '''

        # Recursively build the exeuction plan starting at the root pipeline
        self._build_from_sorted_solids(
            solids_in_topological_order(self.pipeline_def),
            self.pipeline_def.dependency_structure)

        # Construct dependency dictionary
        deps = {step.key: set() for step in self._steps.values()}

        for step in self._steps.values():
            for step_input in step.step_inputs:
                deps[step.key].update(step_input.dependency_keys)

        step_dict = {step.key: step for step in self._steps.values()}

        system_storage_def = self.mode_definition.get_system_storage_def(
            self.environment_config.storage.system_storage_name)

        previous_run_id = self.run_config.previous_run_id
        step_keys_to_execute = self.run_config.step_keys_to_execute or list(
            step_dict.keys())

        return ExecutionPlan(
            self.pipeline_def,
            step_dict,
            deps,
            system_storage_def.is_persistent,
            previous_run_id,
            step_keys_to_execute,
        )
Пример #3
0
def test_diamond_toposort():
    assert [s.name for s in solids_in_topological_order(create_diamond_pipeline())] == [
        'A_source',
        'A',
        'B',
        'C',
        'D',
    ]
Пример #4
0
def create_execution_plan_core(execution_info,
                               execution_metadata,
                               subset_info=None,
                               added_outputs=None):
    check.inst_param(execution_info, 'execution_info', ExecutionPlanInfo)
    check.inst_param(execution_metadata, 'execution_metadata',
                     ExecutionMetadata)
    check.opt_inst_param(subset_info, 'subset_info', ExecutionPlanSubsetInfo)
    check.opt_inst_param(added_outputs, 'added_output',
                         ExecutionPlanAddedOutputs)

    plan_builder = PlanBuilder(pipeline_name=execution_info.pipeline.name,
                               initial_tags=execution_metadata.tags)

    for solid in solids_in_topological_order(execution_info.pipeline):

        with plan_builder.push_tags(solid=solid.name,
                                    solid_definition=solid.definition.name):
            step_inputs = create_step_inputs(execution_info, plan_builder,
                                             solid)

            solid_transform_step = create_transform_step(
                execution_info,
                plan_builder,
                solid,
                step_inputs,
                get_solid_user_config(execution_info, solid),
            )

            plan_builder.steps.append(solid_transform_step)

            for output_def in solid.definition.output_defs:
                with plan_builder.push_tags(output=output_def.name):
                    subplan = create_subplan_for_output(
                        execution_info, plan_builder, solid,
                        solid_transform_step, output_def)
                    plan_builder.steps.extend(subplan.steps)

                    output_handle = solid.output_handle(output_def.name)
                    plan_builder.step_output_map[
                        output_handle] = subplan.terminal_step_output_handle

    execution_plan = create_execution_plan_from_steps(plan_builder.steps)

    if subset_info:
        return _create_augmented_subplan(execution_info, plan_builder,
                                         execution_plan, subset_info,
                                         added_outputs)
    else:
        return execution_plan
Пример #5
0
def create_execution_plan_core(pipeline_context,
                               execution_metadata,
                               subset_info=None,
                               added_outputs=None):
    check.inst_param(pipeline_context, 'pipeline_context',
                     PipelineExecutionContext)
    check.inst_param(execution_metadata, 'execution_metadata',
                     ExecutionMetadata)
    check.opt_inst_param(subset_info, 'subset_info', ExecutionPlanSubsetInfo)
    check.opt_inst_param(added_outputs, 'added_output',
                         ExecutionPlanAddedOutputs)

    plan_builder = PlanBuilder()

    for solid in solids_in_topological_order(pipeline_context.pipeline_def):

        step_inputs = create_step_inputs(pipeline_context, plan_builder, solid)

        solid_transform_step = create_transform_step(pipeline_context, solid,
                                                     step_inputs)

        plan_builder.steps.append(solid_transform_step)

        for output_def in solid.definition.output_defs:
            subplan = create_subplan_for_output(pipeline_context, solid,
                                                solid_transform_step,
                                                output_def)
            plan_builder.steps.extend(subplan.steps)

            output_handle = solid.output_handle(output_def.name)
            plan_builder.step_output_map[
                output_handle] = subplan.terminal_step_output_handle

    execution_plan = create_execution_plan_from_steps(pipeline_context,
                                                      plan_builder.steps)

    if subset_info or added_outputs:
        return _create_augmented_subplan(pipeline_context, execution_plan,
                                         subset_info, added_outputs)
    else:
        return execution_plan
Пример #6
0
def execute_list_command(cli_args, print_fn):
    repository = handle_for_repo_cli_args(cli_args).build_repository_definition()

    title = 'Repository {name}'.format(name=repository.name)
    print_fn(title)
    print_fn('*' * len(title))
    first = True
    for pipeline in repository.get_all_pipelines():
        pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name)

        if not first:
            print_fn('*' * len(pipeline_title))
        first = False

        print_fn(pipeline_title)
        if pipeline.description:
            print_fn('Description:')
            print_fn(format_description(pipeline.description, indent=' ' * 4))
        print_fn('Solids: (Execution Order)')
        for solid in solids_in_topological_order(pipeline):
            print_fn('    ' + solid.name)
Пример #7
0
    def build(self):
        '''Builds the execution plan.
        '''

        # Recursively build the exeuction plan starting at the root pipeline
        self._build_from_sorted_solids(
            solids_in_topological_order(self.pipeline_def), self.pipeline_def.dependency_structure
        )

        # Construct dependency dictionary
        deps = {step.key: set() for step in self.steps}

        for step in self.steps:
            for step_input in step.step_inputs:
                deps[step.key].add(step_input.prev_output_handle.step_key)

        step_dict = {step.key: step for step in self.steps}

        return ExecutionPlan(
            self.pipeline_def,
            step_dict,
            deps,
            self.environment_config.storage.construct_run_storage().is_persistent,
        )
Пример #8
0
def create_execution_plan_core(pipeline_def, environment_config):
    check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
    check.inst_param(environment_config, 'environment_config', EnvironmentConfig)

    plan_builder = PlanBuilder()

    for solid in solids_in_topological_order(pipeline_def):

        step_inputs = create_step_inputs(pipeline_def, environment_config, plan_builder, solid)

        solid_transform_step = create_transform_step(pipeline_def, solid, step_inputs)

        plan_builder.steps.append(solid_transform_step)

        for output_def in solid.definition.output_defs:
            subplan = create_subplan_for_output(
                pipeline_def, environment_config, solid, solid_transform_step, output_def
            )
            plan_builder.steps.extend(subplan.steps)

            output_handle = solid.output_handle(output_def.name)
            plan_builder.step_output_map[output_handle] = subplan.terminal_step_output_handle

    return create_execution_plan_from_steps(pipeline_def, plan_builder.steps, environment_config)
Пример #9
0
def create_execution_plan_core(execution_info, execution_metadata):
    check.inst_param(execution_info, 'execution_info', ExecutionPlanInfo)
    check.inst_param(execution_metadata, 'execution_metadata',
                     ExecutionMetadata)

    state = StepBuilderState(pipeline_name=execution_info.pipeline.name,
                             initial_tags=execution_metadata.tags)

    for solid in solids_in_topological_order(execution_info.pipeline):

        with state.push_tags(solid=solid.name,
                             solid_definition=solid.definition.name):
            step_inputs = create_step_inputs(execution_info, state, solid)

            solid_transform_step = create_transform_step(
                execution_info,
                state,
                solid,
                step_inputs,
                get_solid_user_config(execution_info, solid),
            )

            state.steps.append(solid_transform_step)

            for output_def in solid.definition.output_defs:
                with state.push_tags(output=output_def.name):
                    subplan = create_subplan_for_output(
                        execution_info, state, solid, solid_transform_step,
                        output_def)
                    state.steps.extend(subplan.steps)

                    output_handle = solid.output_handle(output_def.name)
                    state.step_output_map[
                        output_handle] = subplan.terminal_step_output_handle

    return create_execution_plan_from_steps(state.steps)
Пример #10
0
    def _build_from_sorted_solids(self,
                                  solids,
                                  dependency_structure,
                                  parent_handle=None,
                                  parent_step_inputs=None):
        for solid in solids:
            handle = SolidHandle(solid.name, solid.definition.name,
                                 parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                step_input = get_step_input(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # If an input with runtime_type "Nothing" doesnt have a value
                # we don't create a StepInput
                if step_input is None:
                    continue

                check.inst_param(step_input, 'step_input', StepInput)
                step_inputs.append(step_input)

            ### 2a. COMPUTE FUNCTION
            # Create and add execution plan step for the solid compute function
            if isinstance(solid.definition, SolidDefinition):
                solid_compute_step = create_compute_step(
                    self.pipeline_name, self.environment_config, solid,
                    step_inputs, handle)
                self.add_step(solid_compute_step)

            ### 2b. RECURSE
            # Recurse over the solids contained in an instance of CompositeSolidDefinition
            elif isinstance(solid.definition, CompositeSolidDefinition):
                self._build_from_sorted_solids(
                    solids_in_topological_order(solid.definition),
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )

            else:
                check.invariant(
                    False,
                    'Unexpected solid type {type} encountered during execution planning'
                    .format(type=type(solid.definition)),
                )

            ### 3. OUTPUTS
            # Create output handles for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                output_handle = solid.output_handle(name)

                # Punch through layers of composition scope to map to the output of the
                # actual compute step
                resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin(
                    output_def.name, handle)
                compute_step = self.get_step_by_handle(resolved_handle)
                self.set_output_handle(
                    output_handle,
                    StepOutputHandle.from_step(compute_step,
                                               resolved_output_def.name),
                )
Пример #11
0
    def _build_from_sorted_solids(
        self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None
    ):
        terminal_transform_step = None
        for solid in solids:
            handle = SolidHandle(solid.name, solid.definition.name, parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                prev_step_output_handle = get_input_source_step_handles(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # We return None for the handle (see above in get_input_source_step_handle) when the
                # input def runtime type is "Nothing"
                if not prev_step_output_handle:
                    continue

                subplan = create_subplan_for_input(
                    self.pipeline_name,
                    self.environment_config,
                    solid,
                    prev_step_output_handle,
                    input_def,
                    handle,
                )

                self.add_steps(subplan.steps)

                step_inputs.append(
                    StepInput(
                        input_name, input_def.runtime_type, subplan.terminal_step_output_handle
                    )
                )

            ### 2. COMPUTE FUNCTION OR RECURSE
            # Create and add execution plan step for the solid compute function or
            # recurse over the solids in a CompositeSolid
            if isinstance(solid.definition, SolidDefinition):
                solid_transform_step = create_compute_step(
                    self.pipeline_name, self.environment_config, solid, step_inputs, handle
                )
                self.add_step(solid_transform_step)
                terminal_transform_step = solid_transform_step
            elif isinstance(solid.definition, CompositeSolidDefinition):
                terminal_transform_step = self._build_from_sorted_solids(
                    solids_in_topological_order(solid.definition),
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )
            else:
                check.invariant(
                    False,
                    'Unexpected solid type {type} encountered during execution planning'.format(
                        type=type(solid.definition)
                    ),
                )

            ### 3. OUTPUTS
            # Create and add execution plan steps (and output handles) for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                subplan = create_subplan_for_output(
                    self.pipeline_name,
                    self.environment_config,
                    solid,
                    terminal_transform_step,
                    output_def,
                    handle,
                )
                self.add_steps(subplan.steps)

                output_handle = solid.output_handle(name)
                self.set_output_handle(output_handle, subplan.terminal_step_output_handle)

        return terminal_transform_step
Пример #12
0
    def _build_from_sorted_solids(self,
                                  solids,
                                  dependency_structure,
                                  parent_handle=None,
                                  parent_step_inputs=None):
        terminal_compute_step = None
        for solid in solids:
            handle = SolidHandle(solid.name, solid.definition.name,
                                 parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                step_input = get_step_input(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # If an input with runtime_type "Nothing" doesnt have a value
                # we don't create a StepInput
                if step_input is None:
                    continue

                check.inst_param(step_input, 'step_input', StepInput)
                step_inputs.append(step_input)

            ### 2. COMPUTE FUNCTION OR RECURSE
            # Create and add execution plan step for the solid compute function or
            # recurse over the solids in a CompositeSolid
            if isinstance(solid.definition, SolidDefinition):
                solid_compute_step = create_compute_step(
                    self.pipeline_name, self.environment_config, solid,
                    step_inputs, handle)
                self.add_step(solid_compute_step)
                terminal_compute_step = solid_compute_step
            elif isinstance(solid.definition, CompositeSolidDefinition):
                terminal_compute_step = self._build_from_sorted_solids(
                    solids_in_topological_order(solid.definition),
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )
            else:
                check.invariant(
                    False,
                    'Unexpected solid type {type} encountered during execution planning'
                    .format(type=type(solid.definition)),
                )

            ### 3. OUTPUTS
            # Create and add execution plan steps (and output handles) for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                subplan = create_subplan_for_output(self.pipeline_name, solid,
                                                    terminal_compute_step,
                                                    output_def)
                self.add_steps(subplan.steps)

                output_handle = solid.output_handle(name)
                self.set_output_handle(output_handle,
                                       subplan.terminal_step_output_handle)

        return terminal_compute_step