def get_external_execution_plan( self, external_pipeline: ExternalPipeline, run_config: Dict[str, Any], mode: str, step_keys_to_execute: Optional[List[str]], known_state: Optional[KnownExecutionState], instance: Optional[DagsterInstance] = None, ) -> ExternalExecutionPlan: check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.opt_nullable_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) check.opt_inst_param(known_state, "known_state", KnownExecutionState) check.opt_inst_param(instance, "instance", DagsterInstance) execution_plan = create_execution_plan( pipeline=self.get_reconstructable_pipeline(external_pipeline.name). subset_for_execution_from_existing_pipeline( external_pipeline.solids_to_execute), run_config=run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, known_state=known_state, instance_ref=instance.get_ref() if instance and instance.is_persistent else None, ) return ExternalExecutionPlan( execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, external_pipeline.identifying_pipeline_snapshot_id, ))
def get_external_execution_plan_snapshot(recon_pipeline, args): check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.inst_param(args, "args", ExecutionPlanSnapshotArgs) try: pipeline = ( recon_pipeline.subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline ) return snapshot_from_execution_plan( create_execution_plan( pipeline=pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, known_state=args.known_state, ), args.pipeline_snapshot_id, ) except: # pylint: disable=bare-except return ExecutionPlanSnapshotErrorData( error=serializable_error_info_from_exc_info(sys.exc_info()) )
def test_create_execution_plan_snapshot(): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() instance = DagsterInstance.local_temp() execution_plan = create_execution_plan(noop_pipeline) ep_snapshot = snapshot_from_execution_plan( execution_plan, noop_pipeline.get_pipeline_snapshot_id()) ep_snapshot_id = create_execution_plan_snapshot_id(ep_snapshot) result = execute_pipeline(noop_pipeline, instance=instance) assert result.success run = instance.get_run_by_id(result.run_id) assert run.execution_plan_snapshot_id == ep_snapshot_id assert run.execution_plan_snapshot_id == create_execution_plan_snapshot_id( ep_snapshot)
def get_external_execution_plan(self, external_pipeline, run_config, mode, step_keys_to_execute): check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) check.dict_param(run_config, 'run_config') check.str_param(mode, 'mode') check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) return ExternalExecutionPlan( execution_plan_snapshot=snapshot_from_execution_plan( create_execution_plan( pipeline=self.get_reconstructable_pipeline( external_pipeline.name). subset_for_execution_from_existing_pipeline( external_pipeline.solids_to_execute), run_config=run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, ), external_pipeline.identifying_pipeline_snapshot_id, ), represented_pipeline=external_pipeline, )
def ExecutionPlanSnapshot(self, request, _context): execution_plan_args = deserialize_json_to_dagster_namedtuple( request.serialized_execution_plan_snapshot_args) check.inst_param(execution_plan_args, 'execution_plan_args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( execution_plan_args.pipeline_origin).subset_for_execution( execution_plan_args.solid_selection) if execution_plan_args.solid_selection else recon_pipeline_from_origin( execution_plan_args.pipeline_origin)) execution_plan_snapshot = snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=execution_plan_args.run_config, mode=execution_plan_args.mode, step_keys_to_execute=execution_plan_args.step_keys_to_execute, ), execution_plan_args.pipeline_snapshot_id, ) return api_pb2.ExecutionPlanSnapshotReply( serialized_execution_plan_snapshot=serialize_dagster_namedtuple( execution_plan_snapshot))
def get_external_execution_plan( self, external_pipeline: ExternalPipeline, run_config: Dict[str, Any], mode: str, step_keys_to_execute: Optional[List[str]], known_state: Optional[KnownExecutionState], ) -> ExternalExecutionPlan: check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) check.opt_inst_param(known_state, "known_state", KnownExecutionState) return ExternalExecutionPlan( execution_plan_snapshot=snapshot_from_execution_plan( create_execution_plan( pipeline=self.get_reconstructable_pipeline( external_pipeline.name). subset_for_execution_from_existing_pipeline( external_pipeline.solids_to_execute), run_config=run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, known_state=known_state, ), external_pipeline.identifying_pipeline_snapshot_id, ), represented_pipeline=external_pipeline, )
def build_flyte_sdk_workflow(self): ordered_step_dict = self.execution_plan.execution_deps() instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( pipeline_name=self.execution_plan.pipeline_def.display_name, run_id=self.execution_plan.pipeline_def.display_name, run_config=self.run_config, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.execution_plan.pipeline_def. get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( self.execution_plan, self.execution_plan.pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=self.execution_plan.pipeline_def. get_parent_pipeline_snapshot(), ) initialization_manager = PlanExecutionContextManager( Retries.disabled_mode(), self.execution_plan, self.run_config, instance.get_run_by_id( self.execution_plan.pipeline_def.display_name), instance, ) list(initialization_manager.prepare_context()) pipeline_context = initialization_manager.get_context() for step_key in ordered_step_dict: solid_name = self.execution_plan.get_step_by_key( step_key).solid_name self.sdk_node_dict[solid_name] = self.get_sdk_node( pipeline_context, instance, pipeline_run, step_key, storage_request=self.compute_dict[solid_name].get( "storage_request", None), cpu_request=self.compute_dict[solid_name].get( "cpu_request", None), memory_request=self.compute_dict[solid_name].get( "memory_request", None), storage_limit=self.compute_dict[solid_name].get( "storage_limit", None), cpu_limit=self.compute_dict[solid_name].get("cpu_limit", None), memory_limit=self.compute_dict[solid_name].get( "memory_limit", None), )
def create_run_for_pipeline( self, pipeline, execution_plan=None, run_id=None, environment_dict=None, mode=None, selector=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap.execution_plan_snapshot import snapshot_from_execution_plan check.inst_param(pipeline, 'pipeline', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) if execution_plan is None: execution_plan = create_execution_plan( pipeline, environment_dict=environment_dict, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.get_or_create_run( pipeline_name=pipeline.name, run_id=run_id, environment_dict=environment_dict, mode=check.opt_str_param(mode, 'mode', default=pipeline.get_default_mode_name()), selector=check.opt_inst_param( selector, 'selector', ExecutionSelector, default=ExecutionSelector(name=pipeline.name), ), step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline.get_pipeline_snapshot_id()), )
def test_execution_plan_snapshot_backcompat(): src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/") snapshot_dirs = [ f for f in os.listdir(src_dir) if not os.path.isfile(os.path.join(src_dir, f)) ] for snapshot_dir_path in snapshot_dirs: print(f"Executing a saved run from {snapshot_dir_path}") # pylint: disable=print-call with copy_directory(os.path.join(src_dir, snapshot_dir_path)) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: runs = instance.get_runs() assert len(runs) == 1 run = runs[0] assert run.status == PipelineRunStatus.NOT_STARTED the_pipeline = InMemoryPipeline(dynamic_pipeline) # First create a brand new plan from the pipeline and validate it new_plan = create_execution_plan(the_pipeline, run_config=run.run_config) _validate_execution_plan(new_plan) # Create a snapshot and rebuild it, validate the rebuilt plan new_plan_snapshot = snapshot_from_execution_plan( new_plan, run.pipeline_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", new_plan_snapshot) _validate_execution_plan(rebuilt_plan) # Then validate the plan built from the historical snapshot on the run stored_snapshot = instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", stored_snapshot) _validate_execution_plan(rebuilt_plan) # Finally, execute the run (using the historical execution plan snapshot) result = execute_run(the_pipeline, run, instance, raise_on_error=True) assert result.success
def _launch_pipeline_execution(graphene_info, execution_params, is_reexecuted=False): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) if is_reexecuted: # required fields for re-execution execution_metadata = check.inst_param( execution_params.execution_metadata, 'execution_metadata', ExecutionMetadata ) check.str_param(execution_metadata.root_run_id, 'root_run_id') check.str_param(execution_metadata.parent_run_id, 'parent_run_id') instance = graphene_info.context.instance run_launcher = instance.run_launcher if run_launcher is None: return graphene_info.schema.type_named('RunLauncherNotDefinedError')() pipeline_def = get_pipeline_def_from_selector(graphene_info, execution_params.selector) get_validated_config( pipeline_def, environment_dict=execution_params.environment_dict, mode=execution_params.mode, ) execution_plan = create_execution_plan( pipeline_def, execution_params.environment_dict, mode=execution_params.mode, ) _check_start_pipeline_execution_errors(graphene_info, execution_params, execution_plan) pipeline_run = instance.create_run( pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id() ), **pipeline_run_args_from_execution_params( execution_params, get_step_keys_to_execute(instance, pipeline_def, execution_params), ) ) run = instance.launch_run(pipeline_run.run_id) return graphene_info.schema.type_named('LaunchPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(run) )
def test_create_noop_execution_plan(snapshot): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_create_noop_execution_plan_with_tags(snapshot): @solid(tags={'foo': 'bar', 'bar': 'baaz'}) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def execution_plan_snapshot_command(args): check.inst_param(args, 'args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline_from_origin( args.pipeline_origin)) return snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, ), args.pipeline_snapshot_id, )
def execution_plan_snapshot_command(args): check.inst_param(args, 'args', ExecutionPlanSnapshotArgs) recon_pipeline = (recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution(args.solid_selection) if args.solid_selection else recon_pipeline_from_origin( args.pipeline_origin)) try: return snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, run_config=args.run_config, mode=args.mode, step_keys_to_execute=args.step_keys_to_execute, ), args.pipeline_snapshot_id, ) except: # pylint: disable=bare-except return ExecutionPlanSnapshotErrorData( error=serializable_error_info_from_exc_info(sys.exc_info()))
def test_create_execution_plan_with_dep(snapshot): @solid def solid_one(_): return 1 @solid def solid_two(_, num): return num + 1 @pipeline def noop_pipeline(): solid_two(solid_one()) execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_create_with_composite(snapshot): @solid(output_defs=[OutputDefinition(name='out_num', dagster_type=int)]) def return_one(_): return 1 @solid( input_defs=[InputDefinition(name='num', dagster_type=int)], output_defs=[OutputDefinition(int)], ) def add_one(_, num): return num + 1 @composite_solid( output_defs=[OutputDefinition(name='named_output', dagster_type=int)]) def comp_1(): return add_one(return_one()) @composite_solid( output_defs=[OutputDefinition(name='named_output', dagster_type=int)]) def comp_2(): return add_one(return_one()) @solid def add(_, num_one, num_two): return num_one + num_two @pipeline def do_comps(): add(num_one=comp_1(), num_two=comp_2()) execution_plan = create_execution_plan(do_comps) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( do_comps.get_pipeline_snapshot()))))
def execution_plan_snapshot_command(output_file, solid_selection, environment_dict, mode, step_keys_to_execute, snapshot_id, **kwargs): recon_pipeline = recon_pipeline_for_cli_args(kwargs) environment_dict = json.loads(environment_dict) if step_keys_to_execute: step_keys_to_execute = json.loads(step_keys_to_execute) if solid_selection: solid_selection = json.loads(solid_selection) recon_pipeline = recon_pipeline.subset_for_execution(solid_selection) execution_plan_snapshot = snapshot_from_execution_plan( create_execution_plan( pipeline=recon_pipeline, environment_dict=environment_dict, mode=mode, step_keys_to_execute=step_keys_to_execute, ), snapshot_id, ) ipc_write_unary_response(output_file, execution_plan_snapshot)
def _execute_schedule(graphene_info, pipeline_def, execution_params): instance = graphene_info.context.instance execution_plan = None if is_config_valid(pipeline_def, execution_params.environment_dict, execution_params.mode): execution_plan = create_execution_plan( pipeline_def, execution_params.environment_dict, mode=execution_params.mode, ) execution_plan_snapshot = None if execution_plan: execution_plan_snapshot = snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id() ) pipeline_run = instance.get_or_create_run( pipeline_name=pipeline_def.name, environment_dict=execution_params.environment_dict, mode=execution_params.mode, selector=execution_params.selector, tags=execution_params.execution_metadata.tags, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=execution_plan_snapshot, ) # Inject errors into event log at this point # Launch run if run launcher is defined run_launcher = graphene_info.context.instance.run_launcher if run_launcher: result = _launch_pipeline_execution_for_created_run(graphene_info, pipeline_run.run_id) else: result = _start_pipeline_execution_for_created_run(graphene_info, pipeline_run.run_id) return result
def _start_pipeline_execution(graphene_info, execution_params, is_reexecuted=False): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) if is_reexecuted: # required fields for re-execution execution_metadata = check.inst_param( execution_params.execution_metadata, 'execution_metadata', ExecutionMetadata) check.str_param(execution_metadata.root_run_id, 'root_run_id') check.str_param(execution_metadata.parent_run_id, 'parent_run_id') instance = graphene_info.context.instance execution_manager_settings = instance.dagit_settings.get( 'execution_manager') if execution_manager_settings and execution_manager_settings.get( 'disabled'): return graphene_info.schema.type_named( 'StartPipelineRunDisabledError')() pipeline_def = get_pipeline_def_from_selector(graphene_info, execution_params.selector) get_validated_config( pipeline_def, environment_dict=execution_params.environment_dict, mode=execution_params.mode, ) execution_plan = create_execution_plan( pipeline_def, execution_params.environment_dict, mode=execution_params.mode, ) _check_start_pipeline_execution_errors(graphene_info, execution_params, execution_plan) try: pipeline_run = instance.get_or_create_run( pipeline_name=pipeline_def.name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), selector=execution_params.selector or ExecutionSelector(name=pipeline_def.name), environment_dict=execution_params.environment_dict, mode=execution_params.mode, step_keys_to_execute=(get_step_keys_to_execute( instance, pipeline_def, execution_params) or execution_params.step_keys), tags=merge_dicts(pipeline_def.tags, execution_params.execution_metadata.tags), status=PipelineRunStatus.NOT_STARTED, root_run_id=execution_params.execution_metadata.root_run_id, parent_run_id=execution_params.execution_metadata.parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), ) except DagsterRunConflict as exc: return graphene_info.schema.type_named('PipelineRunConflict')(exc) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.get_handle(), pipeline_def, pipeline_run, instance=instance, ) return graphene_info.schema.type_named('StartPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))
def _launch_scheduled_execution(instance, schedule_def, pipeline, tick, stream): pipeline_def = pipeline.get_definition() # Run should_execute and halt if it returns False schedule_context = ScheduleExecutionContext(instance) with user_code_error_boundary( ScheduleExecutionError, lambda: 'Error occurred during the execution of should_execute for schedule ' '{schedule_name}'.format(schedule_name=schedule_def.name), ): should_execute = schedule_def.should_execute(schedule_context) if not should_execute: # Update tick to skipped state and return tick.update_with_status(ScheduleTickStatus.SKIPPED) stream.send(ScheduledExecutionSkipped()) return errors = [] run_config = {} schedule_tags = {} try: with user_code_error_boundary( ScheduleExecutionError, lambda: 'Error occurred during the execution of run_config_fn for schedule ' '{schedule_name}'.format(schedule_name=schedule_def.name), ): run_config = schedule_def.get_run_config(schedule_context) except DagsterUserCodeExecutionError: error_data = serializable_error_info_from_exc_info(sys.exc_info()) errors.append(error_data) try: with user_code_error_boundary( ScheduleExecutionError, lambda: 'Error occurred during the execution of tags_fn for schedule ' '{schedule_name}'.format(schedule_name=schedule_def.name), ): schedule_tags = schedule_def.get_tags(schedule_context) except DagsterUserCodeExecutionError: error_data = serializable_error_info_from_exc_info(sys.exc_info()) errors.append(error_data) pipeline_tags = pipeline_def.tags or {} check_tags(pipeline_tags, 'pipeline_tags') tags = merge_dicts(pipeline_tags, schedule_tags) mode = schedule_def.mode execution_plan_snapshot = None try: execution_plan = create_execution_plan( pipeline_def, run_config=run_config, mode=mode, ) execution_plan_snapshot = snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()) except DagsterInvalidConfigError: error_data = serializable_error_info_from_exc_info(sys.exc_info()) errors.append(error_data) # Enter the run in the DB with the information we have possibly_invalid_pipeline_run = instance.create_run( pipeline_name=schedule_def.pipeline_name, run_id=None, run_config=run_config, mode=mode, solids_to_execute=pipeline.solids_to_execute, step_keys_to_execute=None, solid_selection=pipeline.solid_selection, status=None, root_run_id=None, parent_run_id=None, tags=tags, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=execution_plan_snapshot, parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(), ) tick.update_with_status(ScheduleTickStatus.SUCCESS, run_id=possibly_invalid_pipeline_run.run_id) # If there were errors, inject them into the event log and fail the run if len(errors) > 0: for error in errors: instance.report_engine_event( error.message, possibly_invalid_pipeline_run, EngineEventData.engine_error(error), ) instance.report_run_failed(possibly_invalid_pipeline_run) stream.send( ScheduledExecutionFailed( run_id=possibly_invalid_pipeline_run.run_id, errors=errors)) return # Otherwise the run should be valid so lets launch it # Need an ExternalPipeline to launch so make one here recon_repo = pipeline.get_reconstructable_repository() repo_location = InProcessRepositoryLocation(recon_repo) external_pipeline = repo_location.get_repository( recon_repo.get_definition().name).get_full_external_pipeline( pipeline_def.name) try: launched_run = instance.launch_run( possibly_invalid_pipeline_run.run_id, external_pipeline) except DagsterLaunchFailedError: error = serializable_error_info_from_exc_info(sys.exc_info()) instance.report_engine_event( error.message, possibly_invalid_pipeline_run, EngineEventData.engine_error(error), ) instance.report_run_failed(possibly_invalid_pipeline_run) stream.send( ScheduledExecutionFailed( run_id=possibly_invalid_pipeline_run.run_id, errors=[error])) return stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id)) return
def _make_airflow_dag( handle, pipeline_name, environment_dict=None, mode=None, instance=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, operator=DagsterPythonOperator, ): check.inst_param(handle, 'handle', ExecutionTargetHandle) check.str_param(pipeline_name, 'pipeline_name') environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) mode = check.opt_str_param(mode, 'mode') # Default to use the (persistent) system temp directory rather than a seven.TemporaryDirectory, # which would not be consistent between Airflow task invocations. instance = (check.inst_param(instance, 'instance', DagsterInstance) if instance else DagsterInstance.get( fallback_storage=seven.get_system_temp_directory())) # Only used for Airflow; internally we continue to use pipeline.name dag_id = check.opt_str_param(dag_id, 'dag_id', _rename_for_airflow(pipeline_name)) dag_description = check.opt_str_param(dag_description, 'dag_description', _make_dag_description(pipeline_name)) check.subclass_param(operator, 'operator', BaseOperator) dag_kwargs = dict({'default_args': DEFAULT_ARGS}, **check.opt_dict_param(dag_kwargs, 'dag_kwargs', key_type=str)) op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str) dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs) pipeline = handle.build_pipeline_definition() if mode is None: mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, environment_dict, mode=mode) tasks = {} coalesced_plan = coalesce_execution_steps(execution_plan) for solid_handle, solid_steps in coalesced_plan.items(): step_keys = [step.key for step in solid_steps] operator_parameters = DagsterOperatorParameters( handle=handle, pipeline_name=pipeline_name, environment_dict=environment_dict, mode=mode, task_id=solid_handle, step_keys=step_keys, dag=dag, instance_ref=instance.get_ref(), op_kwargs=op_kwargs, pipeline_snapshot=pipeline.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()), ) task = operator(operator_parameters) tasks[solid_handle] = task for solid_step in solid_steps: for step_input in solid_step.step_inputs: for key in step_input.dependency_keys: prev_solid_handle = execution_plan.get_step_by_key( key).solid_handle.to_string() if solid_handle != prev_solid_handle: tasks[prev_solid_handle].set_downstream(task) return (dag, [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
@solid def nonce_solid(_): return @pipeline def nonce_pipeline(): return nonce_solid() nonce_pipeline_snapshot = nonce_pipeline.get_pipeline_snapshot() nonce_execution_plan_snapshot = snapshot_from_execution_plan( create_execution_plan(nonce_pipeline), nonce_pipeline.get_pipeline_snapshot_id()) def test_init_modified_docker_operator(dagster_docker_image, ): # pylint: disable=redefined-outer-name dagster_operator_parameters = DagsterOperatorParameters( task_id='nonce', environment_dict={'storage': { 'filesystem': {} }}, pipeline_name='', mode='default', op_kwargs={ 'image': dagster_docker_image, 'api_version': 'auto', },