def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, solid_selection=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) # note that solids_to_execute is required to execute the solid subset, which is the # frozenset version of the previous solid_subset. # solid_selection is not required and will not be converted to solids_to_execute here. # i.e. this function doesn't handle solid queries. # solid_selection is only used to pass the user queries further down. check.opt_set_param(solids_to_execute, 'solids_to_execute', of_type=str) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) if solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): # for the case when pipeline_def is created by ExecutablePipeline or ExternalPipeline check.invariant( solids_to_execute == pipeline_def.solids_to_execute, 'Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} ' 'that conflicts with solids_to_execute arg {solids_to_execute}' .format( pipeline_solids_to_execute=str_format_list( pipeline_def.solids_to_execute), solids_to_execute=str_format_list(solids_to_execute), ), ) else: # for cases when `create_run_for_pipeline` is directly called pipeline_def = pipeline_def.get_pipeline_subset_def( solids_to_execute=solids_to_execute) if execution_plan is None: execution_plan = create_execution_plan( pipeline_def, run_config=run_config, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=check.opt_str_param( mode, 'mode', default=pipeline_def.get_default_mode_name()), solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot( ), )
def _make_airflow_dag( recon_repo, pipeline_name, run_config=None, mode=None, instance=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, operator=DagsterPythonOperator, ): check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) check.str_param(pipeline_name, 'pipeline_name') run_config = check.opt_dict_param(run_config, 'run_config', key_type=str) mode = check.opt_str_param(mode, 'mode') # Default to use the (persistent) system temp directory rather than a seven.TemporaryDirectory, # which would not be consistent between Airflow task invocations. instance = (check.inst_param(instance, 'instance', DagsterInstance) if instance else DagsterInstance.get( fallback_storage=seven.get_system_temp_directory())) # Only used for Airflow; internally we continue to use pipeline.name dag_id = check.opt_str_param(dag_id, 'dag_id', _rename_for_airflow(pipeline_name)) dag_description = check.opt_str_param(dag_description, 'dag_description', _make_dag_description(pipeline_name)) check.subclass_param(operator, 'operator', BaseOperator) dag_kwargs = dict({'default_args': DEFAULT_ARGS}, **check.opt_dict_param(dag_kwargs, 'dag_kwargs', key_type=str)) op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str) dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs) pipeline = recon_repo.get_definition().get_pipeline(pipeline_name) if mode is None: mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, run_config, mode=mode) tasks = {} coalesced_plan = coalesce_execution_steps(execution_plan) for solid_handle, solid_steps in coalesced_plan.items(): step_keys = [step.key for step in solid_steps] operator_parameters = DagsterOperatorParameters( recon_repo=recon_repo, pipeline_name=pipeline_name, run_config=run_config, mode=mode, task_id=solid_handle, step_keys=step_keys, dag=dag, instance_ref=instance.get_ref(), op_kwargs=op_kwargs, pipeline_snapshot=pipeline.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()), ) task = operator(operator_parameters) tasks[solid_handle] = task for solid_step in solid_steps: for step_input in solid_step.step_inputs: for key in step_input.dependency_keys: prev_solid_handle = execution_plan.get_step_by_key( key).solid_handle.to_string() if solid_handle != prev_solid_handle: tasks[prev_solid_handle].set_downstream(task) return (dag, [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
@solid def nonce_solid(_): return @pipeline def nonce_pipeline(): return nonce_solid() nonce_pipeline_snapshot = nonce_pipeline.get_pipeline_snapshot() nonce_execution_plan_snapshot = snapshot_from_execution_plan( create_execution_plan(nonce_pipeline), nonce_pipeline.get_pipeline_snapshot_id()) def test_init_modified_docker_operator(dagster_docker_image): with instance_for_test() as instance: dagster_operator_parameters = DagsterOperatorParameters( task_id="nonce", run_config={"intermediate_storage": { "filesystem": {} }}, pipeline_name="", mode="default", op_kwargs={ "image": dagster_docker_image, "api_version": "auto",
def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, run_config=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, solid_selection=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan) # note that solids_to_execute is required to execute the solid subset, which is the # frozenset version of the previous solid_subset. # solid_selection is not required and will not be converted to solids_to_execute here. # i.e. this function doesn't handle solid queries. # solid_selection is only used to pass the user queries further down. check.opt_set_param(solids_to_execute, "solids_to_execute", of_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): # for the case when pipeline_def is created by IPipeline or ExternalPipeline check.invariant( solids_to_execute == pipeline_def.solids_to_execute, "Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} " "that conflicts with solids_to_execute arg {solids_to_execute}" .format( pipeline_solids_to_execute=str_format_list( pipeline_def.solids_to_execute), solids_to_execute=str_format_list(solids_to_execute), ), ) else: # for cases when `create_run_for_pipeline` is directly called pipeline_def = pipeline_def.get_pipeline_subset_def( solids_to_execute=solids_to_execute) full_execution_plan = execution_plan or create_execution_plan( pipeline_def, run_config=run_config, mode=mode, ) check.invariant( len(full_execution_plan.step_keys_to_execute) == len( full_execution_plan.steps)) if _is_memoized_run(tags): if step_keys_to_execute: raise DagsterInvariantViolationError( "step_keys_to_execute parameter cannot be used in conjunction with memoized " "pipeline runs.") step_keys_to_execute = self.resolve_unmemoized_steps( full_execution_plan, run_config=run_config, mode=mode, ) # TODO: tighter integration with existing step_keys_to_execute functionality subsetted_execution_plan = ( full_execution_plan.build_subset_plan(step_keys_to_execute) if step_keys_to_execute else full_execution_plan) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=check.opt_str_param( mode, "mode", default=pipeline_def.get_default_mode_name()), solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( subsetted_execution_plan, pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot( ), )
def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, environment_dict=None, mode=None, solid_subset=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) if solid_subset: if isinstance(pipeline_def, PipelineSubsetForExecution): check.invariant( len(solid_subset) == len(pipeline_def.solid_subset) and set(solid_subset) == set(pipeline_def.solid_subset), 'Cannot create a PipelineRun from pipeline subset {pipeline_solid_subset} that ' 'conflicts with solid_subset arg {solid_subset}'.format( pipeline_solid_subset=str_format_list( pipeline_def.solid_subset), solid_subset=str_format_list(solid_subset), ), ) else: pipeline_def = pipeline_def.subset_for_execution( solid_subset=solid_subset) if execution_plan is None: execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict, mode=check.opt_str_param( mode, 'mode', default=pipeline_def.get_default_mode_name()), solid_subset=solid_subset, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), )
def _start_pipeline_execution(graphene_info, execution_params, is_reexecuted=False): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) if is_reexecuted: # required fields for re-execution execution_metadata = check.inst_param( execution_params.execution_metadata, 'execution_metadata', ExecutionMetadata) check.str_param(execution_metadata.root_run_id, 'root_run_id') check.str_param(execution_metadata.parent_run_id, 'parent_run_id') instance = graphene_info.context.instance execution_manager_settings = instance.dagit_settings.get( 'execution_manager') if execution_manager_settings and execution_manager_settings.get( 'disabled'): return graphene_info.schema.type_named( 'StartPipelineRunDisabledError')() pipeline_def = get_pipeline_def_from_selector(graphene_info, execution_params.selector) get_validated_config( pipeline_def, environment_dict=execution_params.environment_dict, mode=execution_params.mode, ) execution_plan = create_execution_plan( pipeline_def, execution_params.environment_dict, mode=execution_params.mode, ) _check_start_pipeline_execution_errors(graphene_info, execution_params, execution_plan) try: pipeline_run = instance.create_run( pipeline_name=pipeline_def.name, run_id=execution_params.execution_metadata.run_id if execution_params.execution_metadata.run_id else make_new_run_id(), solid_subset=execution_params.selector.solid_subset if execution_params.selector else None, environment_dict=execution_params.environment_dict, mode=execution_params.mode, step_keys_to_execute=(get_step_keys_to_execute( instance, pipeline_def, execution_params) or execution_params.step_keys), tags=merge_dicts(pipeline_def.tags, execution_params.execution_metadata.tags), status=PipelineRunStatus.NOT_STARTED, root_run_id=execution_params.execution_metadata.root_run_id, parent_run_id=execution_params.execution_metadata.parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), ) except DagsterRunConflict as exc: return graphene_info.schema.type_named('PipelineRunConflict')(exc) graphene_info.context.execution_manager.execute_pipeline( graphene_info.context.get_handle(), pipeline_def, pipeline_run, instance=instance, ) return graphene_info.schema.type_named('StartPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))