def create_execution_plan_core(execution_info): check.inst_param(execution_info, 'execution_info', ExecutionPlanInfo) state = StepBuilderState(steps=[], step_output_map=StepOutputMap()) for pipeline_solid in solids_in_topological_order(execution_info.pipeline): step_inputs = create_step_inputs(execution_info, state, pipeline_solid) solid_transform_step = create_transform_step( execution_info, pipeline_solid, step_inputs, get_solid_user_config(execution_info, pipeline_solid), ) state.steps.append(solid_transform_step) for output_def in pipeline_solid.definition.output_defs: subplan = create_subplan_for_output( execution_info, pipeline_solid, solid_transform_step, output_def ) state.steps.extend(subplan.steps) output_handle = pipeline_solid.output_handle(output_def.name) state.step_output_map[output_handle] = subplan.terminal_step_output_handle return create_execution_plan_from_steps(state.steps)
def build(self): '''Builds the execution plan. ''' # Recursively build the exeuction plan starting at the root pipeline self._build_from_sorted_solids( solids_in_topological_order(self.pipeline_def), self.pipeline_def.dependency_structure) # Construct dependency dictionary deps = {step.key: set() for step in self._steps.values()} for step in self._steps.values(): for step_input in step.step_inputs: deps[step.key].update(step_input.dependency_keys) step_dict = {step.key: step for step in self._steps.values()} system_storage_def = self.mode_definition.get_system_storage_def( self.environment_config.storage.system_storage_name) previous_run_id = self.run_config.previous_run_id step_keys_to_execute = self.run_config.step_keys_to_execute or list( step_dict.keys()) return ExecutionPlan( self.pipeline_def, step_dict, deps, system_storage_def.is_persistent, previous_run_id, step_keys_to_execute, )
def test_diamond_toposort(): assert [s.name for s in solids_in_topological_order(create_diamond_pipeline())] == [ 'A_source', 'A', 'B', 'C', 'D', ]
def create_execution_plan_core(execution_info, execution_metadata, subset_info=None, added_outputs=None): check.inst_param(execution_info, 'execution_info', ExecutionPlanInfo) check.inst_param(execution_metadata, 'execution_metadata', ExecutionMetadata) check.opt_inst_param(subset_info, 'subset_info', ExecutionPlanSubsetInfo) check.opt_inst_param(added_outputs, 'added_output', ExecutionPlanAddedOutputs) plan_builder = PlanBuilder(pipeline_name=execution_info.pipeline.name, initial_tags=execution_metadata.tags) for solid in solids_in_topological_order(execution_info.pipeline): with plan_builder.push_tags(solid=solid.name, solid_definition=solid.definition.name): step_inputs = create_step_inputs(execution_info, plan_builder, solid) solid_transform_step = create_transform_step( execution_info, plan_builder, solid, step_inputs, get_solid_user_config(execution_info, solid), ) plan_builder.steps.append(solid_transform_step) for output_def in solid.definition.output_defs: with plan_builder.push_tags(output=output_def.name): subplan = create_subplan_for_output( execution_info, plan_builder, solid, solid_transform_step, output_def) plan_builder.steps.extend(subplan.steps) output_handle = solid.output_handle(output_def.name) plan_builder.step_output_map[ output_handle] = subplan.terminal_step_output_handle execution_plan = create_execution_plan_from_steps(plan_builder.steps) if subset_info: return _create_augmented_subplan(execution_info, plan_builder, execution_plan, subset_info, added_outputs) else: return execution_plan
def create_execution_plan_core(pipeline_context, execution_metadata, subset_info=None, added_outputs=None): check.inst_param(pipeline_context, 'pipeline_context', PipelineExecutionContext) check.inst_param(execution_metadata, 'execution_metadata', ExecutionMetadata) check.opt_inst_param(subset_info, 'subset_info', ExecutionPlanSubsetInfo) check.opt_inst_param(added_outputs, 'added_output', ExecutionPlanAddedOutputs) plan_builder = PlanBuilder() for solid in solids_in_topological_order(pipeline_context.pipeline_def): step_inputs = create_step_inputs(pipeline_context, plan_builder, solid) solid_transform_step = create_transform_step(pipeline_context, solid, step_inputs) plan_builder.steps.append(solid_transform_step) for output_def in solid.definition.output_defs: subplan = create_subplan_for_output(pipeline_context, solid, solid_transform_step, output_def) plan_builder.steps.extend(subplan.steps) output_handle = solid.output_handle(output_def.name) plan_builder.step_output_map[ output_handle] = subplan.terminal_step_output_handle execution_plan = create_execution_plan_from_steps(pipeline_context, plan_builder.steps) if subset_info or added_outputs: return _create_augmented_subplan(pipeline_context, execution_plan, subset_info, added_outputs) else: return execution_plan
def execute_list_command(cli_args, print_fn): repository = handle_for_repo_cli_args(cli_args).build_repository_definition() title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True for pipeline in repository.get_all_pipelines(): pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name) if not first: print_fn('*' * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn('Description:') print_fn(format_description(pipeline.description, indent=' ' * 4)) print_fn('Solids: (Execution Order)') for solid in solids_in_topological_order(pipeline): print_fn(' ' + solid.name)
def build(self): '''Builds the execution plan. ''' # Recursively build the exeuction plan starting at the root pipeline self._build_from_sorted_solids( solids_in_topological_order(self.pipeline_def), self.pipeline_def.dependency_structure ) # Construct dependency dictionary deps = {step.key: set() for step in self.steps} for step in self.steps: for step_input in step.step_inputs: deps[step.key].add(step_input.prev_output_handle.step_key) step_dict = {step.key: step for step in self.steps} return ExecutionPlan( self.pipeline_def, step_dict, deps, self.environment_config.storage.construct_run_storage().is_persistent, )
def create_execution_plan_core(pipeline_def, environment_config): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.inst_param(environment_config, 'environment_config', EnvironmentConfig) plan_builder = PlanBuilder() for solid in solids_in_topological_order(pipeline_def): step_inputs = create_step_inputs(pipeline_def, environment_config, plan_builder, solid) solid_transform_step = create_transform_step(pipeline_def, solid, step_inputs) plan_builder.steps.append(solid_transform_step) for output_def in solid.definition.output_defs: subplan = create_subplan_for_output( pipeline_def, environment_config, solid, solid_transform_step, output_def ) plan_builder.steps.extend(subplan.steps) output_handle = solid.output_handle(output_def.name) plan_builder.step_output_map[output_handle] = subplan.terminal_step_output_handle return create_execution_plan_from_steps(pipeline_def, plan_builder.steps, environment_config)
def create_execution_plan_core(execution_info, execution_metadata): check.inst_param(execution_info, 'execution_info', ExecutionPlanInfo) check.inst_param(execution_metadata, 'execution_metadata', ExecutionMetadata) state = StepBuilderState(pipeline_name=execution_info.pipeline.name, initial_tags=execution_metadata.tags) for solid in solids_in_topological_order(execution_info.pipeline): with state.push_tags(solid=solid.name, solid_definition=solid.definition.name): step_inputs = create_step_inputs(execution_info, state, solid) solid_transform_step = create_transform_step( execution_info, state, solid, step_inputs, get_solid_user_config(execution_info, solid), ) state.steps.append(solid_transform_step) for output_def in solid.definition.output_defs: with state.push_tags(output=output_def.name): subplan = create_subplan_for_output( execution_info, state, solid, solid_transform_step, output_def) state.steps.extend(subplan.steps) output_handle = solid.output_handle(output_def.name) state.step_output_map[ output_handle] = subplan.terminal_step_output_handle return create_execution_plan_from_steps(state.steps)
def _build_from_sorted_solids(self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None): for solid in solids: handle = SolidHandle(solid.name, solid.definition.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): step_input = get_step_input( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # If an input with runtime_type "Nothing" doesnt have a value # we don't create a StepInput if step_input is None: continue check.inst_param(step_input, 'step_input', StepInput) step_inputs.append(step_input) ### 2a. COMPUTE FUNCTION # Create and add execution plan step for the solid compute function if isinstance(solid.definition, SolidDefinition): solid_compute_step = create_compute_step( self.pipeline_name, self.environment_config, solid, step_inputs, handle) self.add_step(solid_compute_step) ### 2b. RECURSE # Recurse over the solids contained in an instance of CompositeSolidDefinition elif isinstance(solid.definition, CompositeSolidDefinition): self._build_from_sorted_solids( solids_in_topological_order(solid.definition), solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, 'Unexpected solid type {type} encountered during execution planning' .format(type=type(solid.definition)), ) ### 3. OUTPUTS # Create output handles for solid outputs for name, output_def in solid.definition.output_dict.items(): output_handle = solid.output_handle(name) # Punch through layers of composition scope to map to the output of the # actual compute step resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin( output_def.name, handle) compute_step = self.get_step_by_handle(resolved_handle) self.set_output_handle( output_handle, StepOutputHandle.from_step(compute_step, resolved_output_def.name), )
def _build_from_sorted_solids( self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None ): terminal_transform_step = None for solid in solids: handle = SolidHandle(solid.name, solid.definition.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): prev_step_output_handle = get_input_source_step_handles( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # We return None for the handle (see above in get_input_source_step_handle) when the # input def runtime type is "Nothing" if not prev_step_output_handle: continue subplan = create_subplan_for_input( self.pipeline_name, self.environment_config, solid, prev_step_output_handle, input_def, handle, ) self.add_steps(subplan.steps) step_inputs.append( StepInput( input_name, input_def.runtime_type, subplan.terminal_step_output_handle ) ) ### 2. COMPUTE FUNCTION OR RECURSE # Create and add execution plan step for the solid compute function or # recurse over the solids in a CompositeSolid if isinstance(solid.definition, SolidDefinition): solid_transform_step = create_compute_step( self.pipeline_name, self.environment_config, solid, step_inputs, handle ) self.add_step(solid_transform_step) terminal_transform_step = solid_transform_step elif isinstance(solid.definition, CompositeSolidDefinition): terminal_transform_step = self._build_from_sorted_solids( solids_in_topological_order(solid.definition), solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, 'Unexpected solid type {type} encountered during execution planning'.format( type=type(solid.definition) ), ) ### 3. OUTPUTS # Create and add execution plan steps (and output handles) for solid outputs for name, output_def in solid.definition.output_dict.items(): subplan = create_subplan_for_output( self.pipeline_name, self.environment_config, solid, terminal_transform_step, output_def, handle, ) self.add_steps(subplan.steps) output_handle = solid.output_handle(name) self.set_output_handle(output_handle, subplan.terminal_step_output_handle) return terminal_transform_step
def _build_from_sorted_solids(self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None): terminal_compute_step = None for solid in solids: handle = SolidHandle(solid.name, solid.definition.name, parent_handle) ### 1. INPUTS # Create and add execution plan steps for solid inputs step_inputs = [] for input_name, input_def in solid.definition.input_dict.items(): step_input = get_step_input( self, solid, input_name, input_def, dependency_structure, handle, parent_step_inputs, ) # If an input with runtime_type "Nothing" doesnt have a value # we don't create a StepInput if step_input is None: continue check.inst_param(step_input, 'step_input', StepInput) step_inputs.append(step_input) ### 2. COMPUTE FUNCTION OR RECURSE # Create and add execution plan step for the solid compute function or # recurse over the solids in a CompositeSolid if isinstance(solid.definition, SolidDefinition): solid_compute_step = create_compute_step( self.pipeline_name, self.environment_config, solid, step_inputs, handle) self.add_step(solid_compute_step) terminal_compute_step = solid_compute_step elif isinstance(solid.definition, CompositeSolidDefinition): terminal_compute_step = self._build_from_sorted_solids( solids_in_topological_order(solid.definition), solid.definition.dependency_structure, parent_handle=handle, parent_step_inputs=step_inputs, ) else: check.invariant( False, 'Unexpected solid type {type} encountered during execution planning' .format(type=type(solid.definition)), ) ### 3. OUTPUTS # Create and add execution plan steps (and output handles) for solid outputs for name, output_def in solid.definition.output_dict.items(): subplan = create_subplan_for_output(self.pipeline_name, solid, terminal_compute_step, output_def) self.add_steps(subplan.steps) output_handle = solid.output_handle(name) self.set_output_handle(output_handle, subplan.terminal_step_output_handle) return terminal_compute_step