def scoped_pipeline_context( pipeline_def, environment_dict, run_config, system_storage_data=None, scoped_resources_builder_cm=create_resource_builder, ): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.dict_param(environment_dict, 'environment_dict', key_type=str) check.inst_param(run_config, 'run_config', RunConfig) check.opt_inst_param(system_storage_data, 'system_storage_data', SystemStorageData) context_creation_data = create_context_creation_data( pipeline_def, environment_dict, run_config) # After this try block, a Dagster exception thrown will result in a pipeline init failure event. try: log_manager = create_log_manager(context_creation_data) with scoped_resources_builder_cm( context_creation_data.pipeline_def, context_creation_data.environment_config, context_creation_data.run_config, log_manager, ) as scoped_resources_builder: system_storage_data = create_system_storage_data( context_creation_data, system_storage_data, scoped_resources_builder) yield construct_pipeline_execution_context( context_creation_data=context_creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=system_storage_data, log_manager=log_manager, ) except DagsterError as dagster_error: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) if run_config.executor_config.raise_on_error: raise dagster_error error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager( run_config, pipeline_def), )
def orchestration_context_event_generator( pipeline: IPipeline, execution_plan: ExecutionPlan, run_config: Dict[str, Any], pipeline_run: PipelineRun, instance: DagsterInstance, raise_on_error: bool, get_executor_def_fn: Optional[Callable[[str], ExecutorDefinition]], output_capture: Optional[Dict["StepOutputHandle", Any]], ) -> Generator[Union[DagsterEvent, PlanOrchestrationContext], None, None]: check.invariant(get_executor_def_fn is None) context_creation_data = create_context_creation_data( pipeline, execution_plan, run_config, pipeline_run, instance, ) log_manager = create_log_manager(context_creation_data) try: executor = create_executor(context_creation_data) execution_context = PlanOrchestrationContext( plan_data=create_plan_data(context_creation_data, raise_on_error, executor.retries), log_manager=log_manager, executor=executor, output_capture=output_capture, ) _validate_plan_with_context(execution_context, execution_plan) yield execution_context except DagsterError as dagster_error: dagster_error = cast(DagsterUserCodeExecutionError, dagster_error) user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_run.pipeline_name, failure_data=PipelineInitFailureData(error=error_info), log_manager=log_manager, ) if raise_on_error: raise dagster_error
def orchestration_context_event_generator( pipeline, execution_plan, run_config, pipeline_run, instance, raise_on_error, get_executor_def_fn, output_capture, ): check.invariant(get_executor_def_fn is None) context_creation_data = create_context_creation_data( pipeline, execution_plan, run_config, pipeline_run, instance, ) log_manager = create_log_manager(context_creation_data) try: executor = create_executor(context_creation_data) execution_context = PlanOrchestrationContext( plan_data=create_plan_data(context_creation_data, raise_on_error, executor.retries), log_manager=log_manager, executor=executor, output_capture=output_capture, ) _validate_plan_with_context(execution_context, execution_plan) yield execution_context except DagsterError as dagster_error: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_run.pipeline_name, failure_data=PipelineInitFailureData(error=error_info), log_manager=log_manager, ) if raise_on_error: raise dagster_error
def scoped_pipeline_context(pipeline_def, environment_dict, run_config, intermediates_manager=None): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.dict_param(environment_dict, 'environment_dict', key_type=str) check.inst_param(run_config, 'run_config', RunConfig) environment_config = create_environment_config( pipeline_def, environment_dict, mode=run_config.mode ) # The run storage mode will be provided by RunConfig or from the "storage" field in the user's # environment config, with preference given to the former if provided. storage_mode = run_config.storage_mode or RunStorageMode.from_environment_config( environment_config.storage.storage_mode ) intermediates_manager = intermediates_manager or construct_intermediates_manager( storage_mode, run_config.run_id, environment_config, pipeline_def ) run_storage = construct_run_storage(storage_mode) run_storage.write_dagster_run_meta( DagsterRunMeta( run_id=run_config.run_id, timestamp=time.time(), pipeline_name=pipeline_def.name ) ) try: loggers = _create_loggers( environment_config, run_config, pipeline_def, pipeline_def.get_mode_definition(run_config.mode), ) log_manager = DagsterLogManager(run_id=run_config.run_id, logging_tags={}, loggers=loggers) with _create_resources( pipeline_def, environment_config, run_config, log_manager ) as resources: yield construct_pipeline_execution_context( run_config, pipeline_def, resources, environment_config, run_storage, intermediates_manager, log_manager, ) except DagsterError as dagster_error: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info() ) if run_config.executor_config.raise_on_error: raise dagster_error error_info = serializable_error_info_from_exc_info(user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager(run_config, pipeline_def), )
def event_generator( self, execution_plan, run_config, pipeline_run, instance, scoped_resources_builder_cm, system_storage_data=None, intermediate_storage=None, raise_on_error=False, ): execution_plan = check.inst_param(execution_plan, "execution_plan", ExecutionPlan) pipeline_def = execution_plan.pipeline.get_definition() run_config = check.dict_param(run_config, "run_config", key_type=str) pipeline_run = check.inst_param(pipeline_run, "pipeline_run", PipelineRun) instance = check.inst_param(instance, "instance", DagsterInstance) scoped_resources_builder_cm = check.callable_param( scoped_resources_builder_cm, "scoped_resources_builder_cm") system_storage_data = check.opt_inst_param(system_storage_data, "system_storage_data", SystemStorageData) intermediate_storage = check.opt_inst_param( intermediate_storage, "intermediate_storage_data", IntermediateStorage) raise_on_error = check.bool_param(raise_on_error, "raise_on_error") execution_context = None resources_manager = None try: context_creation_data = create_context_creation_data( execution_plan, run_config, pipeline_run, instance, ) log_manager = create_log_manager(context_creation_data) resources_manager = scoped_resources_builder_cm( execution_plan, context_creation_data.environment_config, context_creation_data.pipeline_run, log_manager, context_creation_data.resource_keys_to_init, ) for event in resources_manager.generate_setup_events(): yield event scoped_resources_builder = check.inst( resources_manager.get_object(), ScopedResourcesBuilder) system_storage_data = create_system_storage_data( context_creation_data, system_storage_data, scoped_resources_builder) if intermediate_storage or context_creation_data.intermediate_storage_def: intermediate_storage = create_intermediate_storage( context_creation_data, intermediate_storage, scoped_resources_builder, ) else: # remove this as part of https://github.com/dagster-io/dagster/issues/2705 intermediate_storage = system_storage_data.intermediate_storage execution_context = self.construct_context( context_creation_data=context_creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=system_storage_data, log_manager=log_manager, intermediate_storage=intermediate_storage, raise_on_error=raise_on_error, ) _validate_plan_with_context(execution_context, execution_plan) yield execution_context for event in resources_manager.generate_teardown_events(): yield event except DagsterError as dagster_error: if execution_context is None: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager( instance, pipeline_run, pipeline_def), ) if resources_manager: for event in resources_manager.generate_teardown_events(): yield event else: # pipeline teardown failure raise dagster_error if raise_on_error: raise dagster_error
def scoped_pipeline_context( pipeline_def, environment_dict, pipeline_run, instance, execution_plan, system_storage_data=None, scoped_resources_builder_cm=create_resource_builder, raise_on_error=False, ): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.dict_param(environment_dict, 'environment_dict', key_type=str) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) check.opt_inst_param(system_storage_data, 'system_storage_data', SystemStorageData) context_creation_data = create_context_creation_data( pipeline_def, environment_dict, pipeline_run, instance, execution_plan, ) # After this try block, a Dagster exception thrown will result in a pipeline init failure event. pipeline_context = None try: executor_config = create_executor_config(context_creation_data) log_manager = create_log_manager(context_creation_data) with scoped_resources_builder_cm( context_creation_data.pipeline_def, context_creation_data.environment_config, context_creation_data.pipeline_run, log_manager, context_creation_data.resource_keys_to_init, ) as scoped_resources_builder: system_storage_data = create_system_storage_data( context_creation_data, system_storage_data, scoped_resources_builder) pipeline_context = construct_pipeline_execution_context( context_creation_data=context_creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=system_storage_data, log_manager=log_manager, executor_config=executor_config, raise_on_error=raise_on_error, ) yield pipeline_context except DagsterError as dagster_error: # only yield an init failure event if we haven't already yielded context if pipeline_context is None: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager( instance, pipeline_run, pipeline_def), ) if raise_on_error: raise dagster_error # if we've caught an error after context init we're in a problematic state and should just raise else: raise dagster_error
def event_generator( self, execution_plan, run_config, pipeline_run, instance, scoped_resources_builder_cm, intermediate_storage=None, raise_on_error=False, resource_instances_to_override=None, output_capture=None, ): execution_plan = check.inst_param(execution_plan, "execution_plan", ExecutionPlan) pipeline_def = execution_plan.pipeline.get_definition() run_config = check.dict_param(run_config, "run_config", key_type=str) pipeline_run = check.inst_param(pipeline_run, "pipeline_run", PipelineRun) instance = check.inst_param(instance, "instance", DagsterInstance) scoped_resources_builder_cm = check.callable_param( scoped_resources_builder_cm, "scoped_resources_builder_cm") intermediate_storage = check.opt_inst_param( intermediate_storage, "intermediate_storage_data", IntermediateStorage) raise_on_error = check.bool_param(raise_on_error, "raise_on_error") resource_instances_to_override = check.opt_dict_param( resource_instances_to_override, "resource_instances_to_override") execution_context = None resources_manager = None try: context_creation_data = create_context_creation_data( execution_plan, run_config, pipeline_run, instance, ) log_manager = create_log_manager(context_creation_data) resource_defs = execution_plan.pipeline_def.get_mode_definition( context_creation_data.environment_config.mode).resource_defs resources_manager = scoped_resources_builder_cm( resource_defs=resource_defs, resource_configs=context_creation_data.environment_config. resources, log_manager=log_manager, execution_plan=execution_plan, pipeline_run=context_creation_data.pipeline_run, resource_keys_to_init=context_creation_data. resource_keys_to_init, instance=instance, resource_instances_to_override=resource_instances_to_override, emit_persistent_events=True, ) yield from resources_manager.generate_setup_events() scoped_resources_builder = check.inst( resources_manager.get_object(), ScopedResourcesBuilder) intermediate_storage = create_intermediate_storage( context_creation_data, intermediate_storage, scoped_resources_builder, ) execution_context = self.construct_context( context_creation_data=context_creation_data, scoped_resources_builder=scoped_resources_builder, log_manager=log_manager, intermediate_storage=intermediate_storage, raise_on_error=raise_on_error, output_capture=output_capture, ) _validate_plan_with_context(execution_context, execution_plan) yield execution_context yield from resources_manager.generate_teardown_events() except DagsterError as dagster_error: if execution_context is None: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager( instance, pipeline_run, pipeline_def), ) if resources_manager: yield from resources_manager.generate_teardown_events() else: # pipeline teardown failure raise dagster_error if raise_on_error: raise dagster_error
def pipeline_initialization_event_generator( pipeline_def, environment_dict, pipeline_run, instance, execution_plan, scoped_resources_builder_cm, system_storage_data=None, raise_on_error=False, ): pipeline_def = check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) environment_dict = check.dict_param(environment_dict, 'environment_dict', key_type=str) pipeline_run = check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) instance = check.inst_param(instance, 'instance', DagsterInstance) execution_plan = check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) scoped_resources_builder_cm = check.callable_param( scoped_resources_builder_cm, 'scoped_resources_builder_cm') system_storage_data = check.opt_inst_param(system_storage_data, 'system_storage_data', SystemStorageData) raise_on_error = check.bool_param(raise_on_error, 'raise_on_error') pipeline_context = None resources_manager = None try: context_creation_data = create_context_creation_data( pipeline_def, environment_dict, pipeline_run, instance, execution_plan, ) executor_config = create_executor_config(context_creation_data) log_manager = create_log_manager(context_creation_data) resources_manager = scoped_resources_builder_cm( execution_plan, context_creation_data.environment_config, context_creation_data.pipeline_run, log_manager, context_creation_data.resource_keys_to_init, ) for event in resources_manager.generate_setup_events(): yield event scoped_resources_builder = check.inst(resources_manager.get_object(), ScopedResourcesBuilder) system_storage_data = create_system_storage_data( context_creation_data, system_storage_data, scoped_resources_builder) pipeline_context = construct_pipeline_execution_context( context_creation_data=context_creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=system_storage_data, log_manager=log_manager, executor_config=executor_config, raise_on_error=raise_on_error, ) _validate_plan_with_context(pipeline_context, execution_plan) yield pipeline_context for event in resources_manager.generate_teardown_events(): yield event except DagsterError as dagster_error: if pipeline_context is None: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager( instance, pipeline_run, pipeline_def), ) if resources_manager: for event in resources_manager.generate_teardown_events(): yield event else: # pipeline teardown failure raise dagster_error if raise_on_error: raise dagster_error
def host_mode_execution_context_event_generator( pipeline, execution_plan, run_config, pipeline_run, instance, raise_on_error, get_executor_def_fn, output_capture, ): check.inst_param(execution_plan, "execution_plan", ExecutionPlan) check.inst_param(pipeline, "pipeline", ReconstructablePipeline) check.dict_param(run_config, "run_config", key_type=str) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) get_executor_def_fn = check.opt_callable_param( get_executor_def_fn, "get_executor_def_fn", _default_get_executor_def_fn) check.bool_param(raise_on_error, "raise_on_error") check.invariant(output_capture is None) execution_context = None loggers = [] for (logger_def, logger_config) in default_system_loggers(): loggers.append( logger_def.logger_fn( InitLoggerContext( logger_config, pipeline_def=None, logger_def=logger_def, run_id=pipeline_run.run_id, ))) loggers.append(instance.get_logger()) log_manager = DagsterLogManager( run_id=pipeline_run.run_id, logging_tags=get_logging_tags(pipeline_run), loggers=loggers, ) try: executor = _get_host_mode_executor(pipeline, run_config, get_executor_def_fn, instance) execution_context = PlanOrchestrationContext( plan_data=PlanData( pipeline=pipeline, pipeline_run=pipeline_run, instance=instance, execution_plan=execution_plan, raise_on_error=raise_on_error, retry_mode=executor.retries, ), log_manager=log_manager, executor=executor, output_capture=None, ) yield execution_context except DagsterError as dagster_error: if execution_context is None: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_run.pipeline_name, failure_data=PipelineInitFailureData(error=error_info), log_manager=log_manager, ) else: # pipeline teardown failure raise dagster_error if raise_on_error: raise dagster_error
def scoped_pipeline_context( pipeline_def, environment_dict, run_config, instance, system_storage_data=None, scoped_resources_builder_cm=create_resource_builder, ): check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.dict_param(environment_dict, 'environment_dict', key_type=str) check.inst_param(run_config, 'run_config', RunConfig) check.inst_param(instance, 'instance', DagsterInstance) check.opt_inst_param(system_storage_data, 'system_storage_data', SystemStorageData) context_creation_data = create_context_creation_data( pipeline_def, environment_dict, run_config, instance) executor_config = create_executor_config(context_creation_data) check_persistent_storage_requirement( pipeline_def, context_creation_data.system_storage_def, executor_config) # After this try block, a Dagster exception thrown will result in a pipeline init failure event. try: log_manager = create_log_manager(context_creation_data) from .api import ExecutionSelector instance.create_run( PipelineRun( pipeline_name=pipeline_def.name, run_id=run_config.run_id, environment_dict=environment_dict, mode=context_creation_data.mode_def.name, # https://github.com/dagster-io/dagster/issues/1709 # ExecutionSelector should be threaded all the way # down from the top selector=ExecutionSelector(pipeline_def.name), reexecution_config=run_config.reexecution_config, step_keys_to_execute=run_config.step_keys_to_execute, status=PipelineRunStatus.NOT_STARTED, )) if run_config.event_sink: run_config.event_sink.on_pipeline_init() with scoped_resources_builder_cm( context_creation_data.pipeline_def, context_creation_data.environment_config, context_creation_data.run_config, log_manager, ) as scoped_resources_builder: system_storage_data = create_system_storage_data( context_creation_data, system_storage_data, scoped_resources_builder) yield construct_pipeline_execution_context( context_creation_data=context_creation_data, scoped_resources_builder=scoped_resources_builder, system_storage_data=system_storage_data, log_manager=log_manager, executor_config=executor_config, ) except DagsterError as dagster_error: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info()) if executor_config.raise_on_error: raise dagster_error error_info = serializable_error_info_from_exc_info( user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_def.name, failure_data=PipelineInitFailureData(error=error_info), log_manager=_create_context_free_log_manager( run_config, pipeline_def), ) finally: if run_config.event_sink: run_config.event_sink.on_pipeline_teardown()
def host_mode_execution_context_event_generator( execution_plan, recon_pipeline, run_config, pipeline_run, instance, executor, raise_on_error ): check.inst_param(execution_plan, "execution_plan", ExecutionPlan) check.inst_param(recon_pipeline, "recon_pipeline", ReconstructablePipeline) check.dict_param(run_config, "run_config", key_type=str) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.inst_param(executor, "executor", Executor) check.bool_param(raise_on_error, "raise_on_error") execution_context = None loggers = [] # Should these be configurable from the run config (without loading # the full EnvironmentConfig??) for (logger_def, logger_config) in default_system_loggers(): loggers.append( logger_def.logger_fn( InitLoggerContext( logger_config, pipeline_def=None, logger_def=logger_def, run_id=pipeline_run.run_id, ) ) ) loggers.append(instance.get_logger()) log_manager = DagsterLogManager( run_id=pipeline_run.run_id, logging_tags=_get_logging_tags(pipeline_run), loggers=loggers, ) # Create an executor (again how do we pull config from run_config # without going through the full EnvironmentConfig.build flow) try: execution_context = HostModeRunWorkerExecutionContext( execution_context_data=HostModeExecutionContextData( pipeline_run=pipeline_run, recon_pipeline=recon_pipeline, execution_plan=execution_plan, instance=instance, raise_on_error=raise_on_error, retry_mode=executor.retries, ), log_manager=log_manager, executor=executor, ) yield execution_context except DagsterError as dagster_error: if execution_context is None: user_facing_exc_info = ( # pylint does not know original_exc_info exists is is_user_code_error is true # pylint: disable=no-member dagster_error.original_exc_info if dagster_error.is_user_code_error else sys.exc_info() ) error_info = serializable_error_info_from_exc_info(user_facing_exc_info) yield DagsterEvent.pipeline_init_failure( pipeline_name=pipeline_run.pipeline_name, failure_data=PipelineInitFailureData(error=error_info), log_manager=log_manager, ) else: # pipeline teardown failure raise dagster_error if raise_on_error: raise dagster_error