def test_compile(): run_config = RunConfig() environment_config = EnvironmentConfig.build( composition, {'solids': {'add_four': {'inputs': {'num': {'value': 1}}}}}, run_config=None ) plan = ExecutionPlan.build(composition, environment_config, run_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', }
def test_solid_dictionary_some_no_config(): @solid(name='int_config_solid', config=Int, input_defs=[], output_defs=[]) def int_config_solid(_): return None @solid(name='no_config_solid', input_defs=[], output_defs=[]) def no_config_solid(_): return None @pipeline def pipeline_def(): int_config_solid() no_config_solid() env = EnvironmentConfig.build(pipeline_def, {'solids': {'int_config_solid': {'config': 1}}}) assert {'int_config_solid', 'no_config_solid'} == set(env.solids.keys()) assert env.solids == {'int_config_solid': SolidConfig(1), 'no_config_solid': SolidConfig()}
def test_clean_event_generator_exit(): ''' Testing for generator cleanup (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/) ''' from dagster.core.execution.context.init import InitResourceContext pipeline_def = gen_basic_resource_pipeline() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan) log_manager = DagsterLogManager(run_id=pipeline_run.run_id, logging_tags={}, loggers=[]) environment_config = EnvironmentConfig.build(pipeline_def) execution_plan = create_execution_plan(pipeline_def) resource_name, resource_def = next( iter(pipeline_def.get_default_mode().resource_defs.items())) resource_context = InitResourceContext( pipeline_def=pipeline_def, resource_def=resource_def, resource_config=None, run_id=make_new_run_id(), ) generator = single_resource_event_generator(resource_context, resource_name, resource_def) next(generator) generator.close() generator = resource_initialization_event_generator( execution_plan, environment_config, pipeline_run, log_manager, {'a'}) next(generator) generator.close() generator = pipeline_initialization_event_generator( execution_plan, {}, pipeline_run, instance, resource_initialization_manager, ) next(generator) generator.close()
def create_execution_plan( pipeline: Union[IPipeline, PipelineDefinition], run_config: Optional[dict] = None, mode: Optional[str] = None, step_keys_to_execute: Optional[List[str]] = None, ) -> ExecutionPlan: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build( pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute )
def test_resolve_step_output_versions_no_external_dependencies(): versioned_pipeline = versioned_pipeline_factory() speculative_execution_plan = create_execution_plan( versioned_pipeline, run_config={}, mode="main" ) environment_config = EnvironmentConfig.build(versioned_pipeline, run_config={}, mode="main") versions = resolve_step_output_versions( versioned_pipeline, speculative_execution_plan, environment_config ) assert ( versions[StepOutputHandle("versioned_solid_no_input", "result")] == versioned_pipeline_expected_step1_output_version() ) assert ( versions[StepOutputHandle("versioned_solid_takes_input", "result")] == versioned_pipeline_expected_step2_output_version() )
def test_whole_environment(): pipeline_def = PipelineDefinition( name="some_pipeline", mode_defs=[ ModeDefinition( name="test_mode", resource_defs={ "test_resource": ResourceDefinition( resource_fn=lambda _: None, config_schema=Any ) }, ) ], solid_defs=[ SolidDefinition( name="int_config_solid", config_schema=Int, input_defs=[], output_defs=[OutputDefinition()], required_resource_keys={"test_resource"}, compute_fn=lambda *args: None, ), SolidDefinition( name="no_config_solid", input_defs=[], output_defs=[], compute_fn=lambda *args: None ), ], ) env = EnvironmentConfig.build( pipeline_def, { "resources": {"test_resource": {"config": 1}}, "solids": {"int_config_solid": {"config": 123}}, }, ) assert isinstance(env, EnvironmentConfig) assert env.solids == { "int_config_solid": SolidConfig.from_dict({"config": 123}), "no_config_solid": SolidConfig.from_dict({}), } assert env.resources == {"test_resource": ResourceConfig(1), "io_manager": ResourceConfig(None)}
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) environment_config = EnvironmentConfig.build(pipeline.get_definition(), run_config, mode=mode) execution_plan = ExecutionPlan.build(pipeline, environment_config) step_output_versions = resolve_step_output_versions( pipeline.get_definition(), execution_plan, environment_config ) memoized_plan = resolve_memoized_execution_plan( execution_plan, pipeline.get_definition(), run_config, instance, environment_config ) # the step keys that we need to execute are those which do not have their inputs populated. step_keys_not_stored = set(memoized_plan.step_keys_to_execute) table = [] for step_output_handle, version in step_output_versions.items(): table.append( [ "{key}.{output}".format( key=step_output_handle.step_key, output=step_output_handle.output_name ), version, "stored" if step_output_handle.step_key not in step_keys_not_stored else "to-be-recomputed", ] ) table_str = tabulate( table, headers=["Step Output", "Version", "Status of Output"], tablefmt="github" ) click.echo(table_str)
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute"] assert str(exc_info.value ) == "Can not build subset plan from unknown step: nope.compute" with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan( ["nope.compute", "nuh_uh.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"] assert ( str(exc_info.value) == "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute" )
def test_resolve_step_versions_default_value(): speculative_execution_plan = create_execution_plan( versioned_pipeline_default_value) environment_config = EnvironmentConfig.build( versioned_pipeline_default_value) versions = resolve_step_versions(versioned_pipeline_default_value, speculative_execution_plan, environment_config) input_version = join_and_hash(repr("DEFAULTVAL")) solid_def_version = versioned_solid_default_value.version solid_config_version = resolve_config_version(None) solid_resources_version = join_and_hash() solid_version = join_and_hash(solid_def_version, solid_config_version, solid_resources_version) step_version = join_and_hash(input_version, solid_version) assert versions["versioned_solid_default_value"] == step_version
def test_compile(): run_config = RunConfig() environment_config = EnvironmentConfig.build( composition, {'solids': {'add_four': {'inputs': {'num': {'value': 1}}}}}, run_config=None ) plan = ExecutionPlan.build( composition, environment_config, composition.get_mode_definition(run_config.mode) ) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.adder_1.adder_1', 'add_four.adder_1.adder_2', 'add_four.adder_2.adder_1', 'add_four.adder_2.adder_2', 'div_four.div_1', 'div_four.div_2', }
def test_compile(): environment_config = EnvironmentConfig.build( composition, {"solids": {"add_four": {"inputs": {"num": {"value": 1}}}}}, ) plan = ExecutionPlan.build(InMemoryPipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { "add_four.add", "div_four.div_two", "div_four.div_two_2", "add_four.emit_two.emit_one_2", "add_four.emit_two_2.add", "int_to_float", "add_four.emit_two_2.emit_one_2", "add_four.emit_two.add", "add_four.emit_two_2.emit_one", "add_four.emit_two.emit_one", }
def create_context_creation_data(pipeline_def, environment_dict, run_config, instance): environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, run_config) mode_def = pipeline_def.get_mode_definition(run_config.mode) system_storage_def = system_storage_def_from_config( mode_def, environment_config) executor_def = executor_def_from_config(mode_def, environment_config) execution_target_handle, _ = ExecutionTargetHandle.get_handle(pipeline_def) return ContextCreationData( pipeline_def=pipeline_def, environment_config=environment_config, run_config=run_config, mode_def=mode_def, system_storage_def=system_storage_def, execution_target_handle=execution_target_handle, executor_def=executor_def, instance=instance, )
def test_solid_dictionary_some_no_config(): @solid(name="int_config_solid", config_schema=Int, input_defs=[], output_defs=[]) def int_config_solid(_): return None @solid(name="no_config_solid", input_defs=[], output_defs=[]) def no_config_solid(_): return None @pipeline def pipeline_def(): int_config_solid() no_config_solid() env = EnvironmentConfig.build(pipeline_def, {"solids": {"int_config_solid": {"config": 1}}}) assert {"int_config_solid", "no_config_solid"} == set(env.solids.keys()) assert env.solids == { "int_config_solid": SolidConfig.from_dict({"config": 1}), "no_config_solid": SolidConfig.from_dict({}), }
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { "storage": { "filesystem": {} }, "intermediate_storage": { "in_memory": {} } } instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert not intermediate_storage.has_intermediate( None, StepOutputHandle("return_one"))
def test_provided_default_on_resources_config(): @solid( name="some_solid", input_defs=[], output_defs=[], required_resource_keys={"some_resource"} ) def some_solid(_): return None @pipeline( mode_defs=[ ModeDefinition( name="some_mode", resource_defs={ "some_resource": ResourceDefinition( resource_fn=lambda _: None, config_schema={ "with_default_int": Field(Int, is_required=False, default_value=23434) }, ) }, ) ] ) def pipeline_def(): some_solid() env_type = create_environment_type(pipeline_def) some_resource_field = env_type.fields["resources"].config_type.fields["some_resource"] assert some_resource_field.is_required is False some_resource_config_field = some_resource_field.config_type.fields["config"] assert some_resource_config_field.is_required is False assert some_resource_config_field.default_value == {"with_default_int": 23434} assert some_resource_field.default_value == {"config": {"with_default_int": 23434}} value = EnvironmentConfig.build(pipeline_def, {}) assert value.resources == { "some_resource": ResourceConfig({"with_default_int": 23434}), "io_manager": ResourceConfig(None), }
def create_context_creation_data( execution_plan, run_config, pipeline_run, instance, ): pipeline_def = execution_plan.pipeline.get_definition() environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=pipeline_run.mode) mode_def = pipeline_def.get_mode_definition(pipeline_run.mode) system_storage_def = system_storage_def_from_config(mode_def, environment_config) executor_def = executor_def_from_config(mode_def, environment_config) return ContextCreationData( pipeline=execution_plan.pipeline, environment_config=environment_config, pipeline_run=pipeline_run, mode_def=mode_def, system_storage_def=system_storage_def, executor_def=executor_def, instance=instance, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, system_storage_def ), )
def test_whole_environment(): pipeline_def = PipelineDefinition( name='some_pipeline', mode_defs=[ ModeDefinition( name='test_mode', resource_defs={ 'test_resource': ResourceDefinition( resource_fn=lambda _: None, config_schema=Any ) }, ) ], solid_defs=[ SolidDefinition( name='int_config_solid', config_schema=Int, input_defs=[], output_defs=[], compute_fn=lambda *args: None, ), SolidDefinition( name='no_config_solid', input_defs=[], output_defs=[], compute_fn=lambda *args: None ), ], ) env = EnvironmentConfig.build( pipeline_def, { 'resources': {'test_resource': {'config': 1}}, 'solids': {'int_config_solid': {'config': 123}}, }, ) assert isinstance(env, EnvironmentConfig) assert env.solids == {'int_config_solid': SolidConfig(123), 'no_config_solid': SolidConfig()} assert env.resources == {'test_resource': {'config': 1}}
def test_required_solid_with_required_subfield(): pipeline_def = PipelineDefinition( name="some_pipeline", solid_defs=[ SolidDefinition( name="int_config_solid", config_schema={"required_field": String}, input_defs=[], output_defs=[], compute_fn=lambda *_args: None, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields["solids"].is_required is True assert env_type.fields["solids"].config_type solids_type = env_type.fields["solids"].config_type assert solids_type.fields["int_config_solid"].is_required is True int_config_solid_type = solids_type.fields["int_config_solid"].config_type assert int_config_solid_type.fields["config"].is_required is True assert env_type.fields["execution"].is_required is False env_obj = EnvironmentConfig.build( pipeline_def, {"solids": {"int_config_solid": {"config": {"required_field": "foobar"}}}}, ) assert env_obj.solids["int_config_solid"].config["required_field"] == "foobar" res = process_config(env_type, {"solids": {}}) assert not res.success res = process_config(env_type, {}) assert not res.success
def test_resource_versions(): run_config = { "resources": { "test_resource": { "config": {"input_str": "apple"}, }, "test_resource_no_version": {"config": {"input_str": "banana"}}, } } environment_config = EnvironmentConfig.build(modes_pipeline, run_config, mode="fakemode") resource_versions_by_key = resolve_resource_versions(environment_config, modes_pipeline) assert resource_versions_by_key["test_resource"] == join_and_hash( resolve_config_version({"input_str": "apple"}), test_resource.version ) assert resource_versions_by_key["test_resource_no_version"] == None assert resource_versions_by_key["test_resource_no_config"] == join_and_hash( join_and_hash(), "42" )
def create_execution_plan(pipeline, environment_dict=None, mode=None, step_keys_to_execute=None): pipeline, pipeline_def = _check_pipeline(pipeline) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) mode = check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_step_versions_with_resources(): run_config = { "resources": { "test_resource": { "config": { "input_str": "apple" } } } } speculative_execution_plan = create_execution_plan( versioned_modes_pipeline, run_config=run_config, ) versions = resolve_step_versions(speculative_execution_plan, run_config=run_config, mode="fakemode") solid_def_version = fake_solid_resources_versioned.version solid_config_version = resolve_config_version(None) environment_config = EnvironmentConfig.build(versioned_modes_pipeline, mode="fakemode", run_config=run_config) resource_versions_by_key = resolve_resource_versions( environment_config, versioned_modes_pipeline.get_mode_definition("fakemode")) solid_resources_version = join_and_hash(*[ resource_versions_by_key[resource_key] for resource_key in fake_solid_resources_versioned.required_resource_keys ]) solid_version = join_and_hash(solid_def_version, solid_config_version, solid_resources_version) step_version = join_and_hash(solid_version) assert versions["fake_solid_resources_versioned.compute"] == step_version
def test_required_solid_with_required_subfield(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[ SolidDefinition( name='int_config_solid', config_schema={'required_field': String}, input_defs=[], output_defs=[], compute_fn=lambda *_args: None, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is True assert env_type.fields['solids'].config_type solids_type = env_type.fields['solids'].config_type assert solids_type.fields['int_config_solid'].is_required is True int_config_solid_type = solids_type.fields['int_config_solid'].config_type assert int_config_solid_type.fields['config'].is_required is True assert env_type.fields['execution'].is_required is False env_obj = EnvironmentConfig.build( pipeline_def, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}}, ) assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar' res = process_config(env_type, {'solids': {}}) assert not res.success res = process_config(env_type, {}) assert not res.success
def test_required_solid_with_required_subfield(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[ SolidDefinition( name='int_config_solid', config={'required_field': String}, input_defs=[], output_defs=[], compute_fn=lambda *_args: None, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is True assert env_type.fields['solids'].config_type solids_type = env_type.fields['solids'].config_type assert solids_type.fields['int_config_solid'].is_required is True int_config_solid_type = solids_type.fields['int_config_solid'].config_type assert int_config_solid_type.fields['config'].is_required is True assert env_type.fields['execution'].is_required is False env_obj = EnvironmentConfig.build( pipeline_def, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}}, ) assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar' with pytest.raises(DagsterEvaluateConfigValueError): throwing_validate_config_value(env_type, {'solids': {}}) with pytest.raises(DagsterEvaluateConfigValueError): throwing_validate_config_value(env_type, {})
def test_solid_dictionary_type(): pipeline_def = define_test_solids_config_pipeline() solid_dict_type = define_solid_dictionary_cls( pipeline_def.solids, pipeline_def.dependency_structure) env_obj = EnvironmentConfig.build( pipeline_def, { 'solids': { 'int_config_solid': { 'config': 1 }, 'string_config_solid': { 'config': 'bar' } }, }, ) value = env_obj.solids assert set(['int_config_solid', 'string_config_solid']) == set(value.keys()) assert value == { 'int_config_solid': SolidConfig(1), 'string_config_solid': SolidConfig('bar') } assert solid_dict_type.type_attributes.is_system_config for specific_solid_config_field in solid_dict_type.fields.values(): specific_solid_config_type = specific_solid_config_field.config_type assert specific_solid_config_type.type_attributes.is_system_config user_config_field = specific_solid_config_field.config_type.fields[ 'config'] assert user_config_field.config_type.type_attributes.is_system_config is False
def create_context_creation_data( pipeline_def, environment_dict, pipeline_run, instance, execution_plan ): environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, pipeline_run) mode_def = pipeline_def.get_mode_definition(pipeline_run.mode) system_storage_def = system_storage_def_from_config(mode_def, environment_config) executor_def = executor_def_from_config(mode_def, environment_config) execution_target_handle, _ = ExecutionTargetHandle.get_handle(pipeline_def) return ContextCreationData( pipeline_def=pipeline_def, environment_config=environment_config, pipeline_run=pipeline_run, mode_def=mode_def, system_storage_def=system_storage_def, execution_target_handle=execution_target_handle, executor_def=executor_def, instance=instance, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, system_storage_def ), )
def resolve_memoized_execution_plan(self, execution_plan, run_config, mode): """ Returns: ExecutionPlan: Execution plan configured to only run unmemoized steps. """ pipeline_def = execution_plan.pipeline.get_definition() pipeline_name = pipeline_def.name step_output_versions = resolve_step_output_versions( execution_plan, EnvironmentConfig.build(pipeline_def, run_config, mode), pipeline_def.get_mode_definition(mode), ) if all(version is None for version in step_output_versions.values()): raise DagsterInvariantViolationError( "While creating a memoized pipeline run, no steps have versions. At least one step " "must have a version." ) step_output_addresses = self.get_addresses_for_step_output_versions( { (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() if version } ) step_keys_to_execute = list( { step_output_handle.step_key for step_output_handle in step_output_versions.keys() if (pipeline_name, step_output_handle) not in step_output_addresses } ) return execution_plan.build_memoized_plan(step_keys_to_execute, step_output_addresses)
def create_execution_plan(pipeline, run_config=None, mode=None, step_keys_to_execute=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) run_config = check.opt_dict_param(run_config, 'run_config', key_type=str) mode = check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def get_context(self, solid_config=None, mode_def=None, run_config=None): """Get a dagstermill execution context for interactive exploration and development. Args: solid_config (Optional[Any]): If specified, this value will be made available on the context as its ``solid_config`` property. mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to use to construct the context. Specify this if you would like a context constructed with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode with a console logger will be constructed. run_config(Optional[dict]): The environment config dict with which to construct the context. Returns: :py:class:`~dagstermill.DagstermillExecutionContext` """ check.opt_inst_param(mode_def, "mode_def", ModeDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) # If we are running non-interactively, and there is already a context reconstituted, return # that context rather than overwriting it. if self.context is not None and isinstance( self.context, DagstermillRuntimeExecutionContext): return self.context if not mode_def: mode_def = ModeDefinition( logger_defs={"dagstermill": colored_console_logger}) run_config["loggers"] = {"dagstermill": {}} solid_def = SolidDefinition( name="this_solid", input_defs=[], compute_fn=lambda *args, **kwargs: None, output_defs=[], description= "Ephemeral solid constructed by dagstermill.get_context()", required_resource_keys=mode_def.resource_key_set, ) pipeline_def = PipelineDefinition( [solid_def], mode_defs=[mode_def], name="ephemeral_dagstermill_pipeline") run_id = make_new_run_id() # construct stubbed PipelineRun for notebook exploration... # The actual pipeline run during pipeline execution will be serialized and reconstituted # in the `reconstitute_pipeline_context` call pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=mode_def.name, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, tags=None, ) self.in_pipeline = False self.solid_def = solid_def self.pipeline = pipeline_def environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode_def.name) pipeline = InMemoryPipeline(pipeline_def) execution_plan = ExecutionPlan.build(pipeline, environment_config) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, DagsterInstance.ephemeral(), scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=solid_config, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, environment_config, pipeline_context.intermediate_storage_def, ), solid_name=solid_def.name, ) return self.context
def reconstitute_pipeline_context( self, output_log_path=None, marshal_dir=None, run_config=None, executable_dict=None, pipeline_run_dict=None, solid_handle_kwargs=None, instance_ref_dict=None, ): """Reconstitutes a context for dagstermill-managed execution. You'll see this function called to reconstruct a pipeline context within the ``injected parameters`` cell of a dagstermill output notebook. Users should not call this function interactively except when debugging output notebooks. Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a context for interactive exploration and development. This call will be replaced by one to :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by dagstermill. """ check.opt_str_param(output_log_path, "output_log_path") check.opt_str_param(marshal_dir, "marshal_dir") run_config = check.opt_dict_param(run_config, "run_config", key_type=str) check.dict_param(pipeline_run_dict, "pipeline_run_dict") check.dict_param(executable_dict, "executable_dict") check.dict_param(solid_handle_kwargs, "solid_handle_kwargs") check.dict_param(instance_ref_dict, "instance_ref_dict") pipeline = ReconstructablePipeline.from_dict(executable_dict) pipeline_def = pipeline.get_definition() try: instance_ref = unpack_value(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) except Exception as err: # pylint: disable=broad-except raise DagstermillError( "Error when attempting to resolve DagsterInstance from serialized InstanceRef" ) from err pipeline_run = unpack_value(pipeline_run_dict) solid_handle = SolidHandle.from_dict(solid_handle_kwargs) solid_def = pipeline_def.get_solid(solid_handle).definition self.marshal_dir = marshal_dir self.in_pipeline = True self.solid_def = solid_def self.pipeline = pipeline environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=pipeline_run.mode) execution_plan = ExecutionPlan.build( self.pipeline, environment_config, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, instance, scoped_resources_builder_cm=self._setup_resources, # Set this flag even though we're not in test for clearer error reporting raise_on_error=True, ) as pipeline_context: self.context = DagstermillRuntimeExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=run_config.get("solids", {}).get(solid_def.name, {}).get("config"), resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, environment_config, pipeline_context.intermediate_storage_def, ), solid_name=solid_def.name, ) return self.context
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {"solids": { "add_one": { "inputs": { "num": { "value": 3 } } } }}) result = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, ) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 ## re-execute add_two environment_config = EnvironmentConfig.build( pipeline_def, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline_def), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) step_events = execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two")