def test_resolve_memoized_execution_plan_yes_stored_results(): manager = VersionedInMemoryIOManager() versioned_pipeline = versioned_pipeline_factory(manager) speculative_execution_plan = create_execution_plan(versioned_pipeline) environment_config = EnvironmentConfig.build(versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input", "result") step_output_version = resolve_step_output_versions( versioned_pipeline, speculative_execution_plan, environment_config )[step_output_handle] manager.values[ (step_output_handle.step_key, step_output_handle.output_name, step_output_version) ] = 4 with DagsterInstance.ephemeral() as dagster_instance: memoized_execution_plan = resolve_memoized_execution_plan( speculative_execution_plan, versioned_pipeline, {}, dagster_instance, environment_config ) assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input"] expected_handle = StepOutputHandle( step_key="versioned_solid_no_input", output_name="result" ) assert ( memoized_execution_plan.get_step_by_key("versioned_solid_takes_input") .step_input_dict["intput"] .source.step_output_handle == expected_handle )
def check_io_manager_intermediate_storage( mode_def: ModeDefinition, environment_config: EnvironmentConfig ) -> None: """Only one of io_manager and intermediate_storage should be set.""" # pylint: disable=comparison-with-callable from dagster.core.storage.system_storage import mem_intermediate_storage intermediate_storage_def = environment_config.intermediate_storage_def_for_mode(mode_def) intermediate_storage_is_default = ( intermediate_storage_def is None or intermediate_storage_def == mem_intermediate_storage ) io_manager = mode_def.resource_defs["io_manager"] io_manager_is_default = io_manager == mem_io_manager if not intermediate_storage_is_default and not io_manager_is_default: raise DagsterInvariantViolationError( 'You have specified an intermediate storage, "{intermediate_storage_name}", and have ' "also specified a default IO manager. You must specify only one. To avoid specifying " "an intermediate storage, omit the intermediate_storage_defs argument to your" 'ModeDefinition and omit "intermediate_storage" in your run config. To avoid ' 'specifying a default IO manager, omit the "io_manager" key from the ' "resource_defs argument to your ModeDefinition.".format( intermediate_storage_name=intermediate_storage_def.name ) )
def test_optional_solid_with_optional_scalar_config(): def _assert_config_none(context, value): assert context.solid_config is value pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[ SolidDefinition( name='int_config_solid', config_schema=Field(Int, is_required=False), input_defs=[], output_defs=[], compute_fn=lambda context, _inputs: _assert_config_none(context, 234), ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is False solids_type = env_type.fields['solids'].config_type assert solids_type.fields['int_config_solid'].is_required is False env_obj = EnvironmentConfig.build(pipeline_def, {}) assert env_obj.solids['int_config_solid'].config is None
def test_step_versions_with_resources(): run_config = {"resources": {"test_resource": {"config": {"input_str": "apple"}}}} speculative_execution_plan = create_execution_plan( versioned_modes_pipeline, run_config=run_config, ) versions = resolve_step_versions_for_test( speculative_execution_plan, run_config=run_config, mode="fakemode" ) solid_def_version = fake_solid_resources_versioned.version solid_config_version = resolve_config_version(None) environment_config = EnvironmentConfig.build( versioned_modes_pipeline, mode="fakemode", run_config=run_config ) resource_versions_by_key = resolve_resource_versions( environment_config, versioned_modes_pipeline.get_mode_definition("fakemode") ) solid_resources_version = join_and_hash( *[ resource_versions_by_key[resource_key] for resource_key in fake_solid_resources_versioned.required_resource_keys ] ) solid_version = join_and_hash(solid_def_version, solid_config_version, solid_resources_version) step_version = join_and_hash(solid_version) assert versions["fake_solid_resources_versioned.compute"] == step_version
def test_basic_json_default_output_config_schema(): env = EnvironmentConfig.build( single_int_output_pipeline(), { "solids": { "return_one": { "outputs": [{ "result": { "json": { "path": "foo" } } }] } } }, ) assert env.solids["return_one"] assert env.solids["return_one"].outputs.type_materializer_specs == [{ "result": { "json": { "path": "foo" } } }]
def test_compile(): run_config = RunConfig() environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, run_config=None) plan = ExecutionPlan.build( composition, environment_config, composition.get_mode_definition(run_config.mode)) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', }
def test_solid_dictionary_some_no_config(): @solid(name='int_config_solid', config_schema=Int, input_defs=[], output_defs=[]) def int_config_solid(_): return None @solid(name='no_config_solid', input_defs=[], output_defs=[]) def no_config_solid(_): return None @pipeline def pipeline_def(): int_config_solid() no_config_solid() env = EnvironmentConfig.build( pipeline_def, {'solids': { 'int_config_solid': { 'config': 1 } }}) assert {'int_config_solid', 'no_config_solid'} == set(env.solids.keys()) assert env.solids == { 'int_config_solid': SolidConfig(1), 'no_config_solid': SolidConfig() }
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success ## re-execute add_two environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )
def test_compile(): environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def test_solid_dictionary_some_no_config(): @solid(name="int_config_solid", config_schema=Int, input_defs=[], output_defs=[]) def int_config_solid(_): return None @solid(name="no_config_solid", input_defs=[], output_defs=[]) def no_config_solid(_): return None @pipeline def pipeline_def(): int_config_solid() no_config_solid() env = EnvironmentConfig.build( pipeline_def, {"solids": { "int_config_solid": { "config": 1 } }}) assert {"int_config_solid", "no_config_solid"} == set(env.solids.keys()) assert env.solids == { "int_config_solid": SolidConfig.from_dict({"config": 1}), "no_config_solid": SolidConfig.from_dict({}), }
def create_execution_plan(pipeline, environment_dict=None, mode=None, step_keys_to_execute=None): # backcompat if isinstance(pipeline, PipelineDefinition): pipeline = InMemoryExecutablePipeline(pipeline) check.inst_param(pipeline, 'pipeline', ExecutablePipeline) pipeline_def = pipeline.get_definition() environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) mode = check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_solid_dictionary_type(): pipeline_def = define_test_solids_config_pipeline() env_obj = EnvironmentConfig.build( pipeline_def, { "solids": { "int_config_solid": { "config": 1 }, "string_config_solid": { "config": "bar" } }, }, ) value = env_obj.solids assert set(["int_config_solid", "string_config_solid"]) == set(value.keys()) assert value == { "int_config_solid": SolidConfig.from_dict({"config": 1}), "string_config_solid": SolidConfig.from_dict({"config": "bar"}), }
def test_step_versions_composite_solid(): @solid(config_schema=Field(String, is_required=False)) def scalar_config_solid(context): yield Output(context.solid_config) @composite_solid( config_schema={"override_str": Field(String)}, config_fn=lambda cfg: {"scalar_config_solid": {"config": cfg["override_str"]}}, ) def wrap(): return scalar_config_solid() @pipeline def wrap_pipeline(): wrap.alias("do_stuff")() run_config = { "solids": {"do_stuff": {"config": {"override_str": "override"}}}, "loggers": {"console": {"config": {"log_level": "ERROR"}}}, } speculative_execution_plan = create_execution_plan(wrap_pipeline, run_config=run_config) environment_config = EnvironmentConfig.build(wrap_pipeline, run_config=run_config) versions = resolve_step_versions(wrap_pipeline, speculative_execution_plan, environment_config) assert versions["do_stuff.scalar_config_solid"] == None
def test_resolve_memoized_execution_plan_partial_versioning(): manager = VersionedInMemoryIOManager() partially_versioned_pipeline = partially_versioned_pipeline_factory(manager) speculative_execution_plan = create_execution_plan(partially_versioned_pipeline) environment_config = EnvironmentConfig.build(partially_versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input", "result") step_output_version = resolve_step_output_versions( partially_versioned_pipeline, speculative_execution_plan, environment_config )[step_output_handle] manager.values[ (step_output_handle.step_key, step_output_handle.output_name, step_output_version) ] = 4 with DagsterInstance.ephemeral() as instance: assert ( resolve_memoized_execution_plan( speculative_execution_plan, partially_versioned_pipeline, {}, instance, environment_config, ).step_keys_to_execute == ["solid_takes_input"] )
def test_compile(): environment_config = EnvironmentConfig.build( composition, {"solids": { "add_four": { "inputs": { "num": { "value": 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryPipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { "add_four.add_two.add_one", "add_four.add_two.add_one_2", "add_four.add_two_2.add_one", "add_four.add_two_2.add_one_2", "div_four.div_two", "div_four.div_two_2", "int_to_float", }
def resolve_step_versions_for_test(execution_plan, run_config=None, mode=None): return resolve_step_versions( execution_plan=execution_plan, environment_config=EnvironmentConfig.build(execution_plan.pipeline_def, run_config, mode), mode_def=execution_plan.pipeline_def.get_mode_definition(mode), )
def test_basic_json_default_output_config_schema(): env = EnvironmentConfig.build( single_int_output_pipeline(), { 'solids': { 'return_one': { 'outputs': [{ 'result': { 'json': { 'path': 'foo' } } }] } } }, ) assert env.solids['return_one'] assert env.solids['return_one'].outputs == [{ 'result': { 'json': { 'path': 'foo' } } }]
def test_resource_versions(): run_config = { "resources": { "test_resource": { "config": { "input_str": "apple" }, }, "test_resource_no_version": { "config": { "input_str": "banana" } }, } } environment_config = EnvironmentConfig.build( modes_pipeline, run_config=run_config, mode="fakemode", ) resource_versions_by_key = resolve_resource_versions( environment_config, modes_pipeline.get_mode_definition("fakemode")) assert resource_versions_by_key["test_resource"] == join_and_hash( resolve_config_version({"config": { "input_str": "apple" }}), test_resource.version) assert resource_versions_by_key["test_resource_no_version"] == None assert resource_versions_by_key[ "test_resource_no_config"] == join_and_hash(join_and_hash(), "42")
def test_solid_dictionary_type(): pipeline_def = define_test_solids_config_pipeline() env_obj = EnvironmentConfig.build( pipeline_def, { 'solids': { 'int_config_solid': { 'config': 1 }, 'string_config_solid': { 'config': 'bar' } }, }, ) value = env_obj.solids assert set(['int_config_solid', 'string_config_solid']) == set(value.keys()) assert value == { 'int_config_solid': SolidConfig(1), 'string_config_solid': SolidConfig('bar') }
def resolve_memoized_execution_plan(self, execution_plan, run_config, mode): """ Returns: ExecutionPlan: Execution plan configured to only run unmemoized steps. """ pipeline_def = execution_plan.pipeline.get_definition() pipeline_name = pipeline_def.name step_output_versions = resolve_step_output_versions( execution_plan, EnvironmentConfig.build(pipeline_def, run_config, mode), pipeline_def.get_mode_definition(mode), ) if all(version is None for version in step_output_versions.values()): raise DagsterInvariantViolationError( "While creating a memoized pipeline run, no steps have versions. At least one step " "must have a version.") step_output_addresses = self.get_addresses_for_step_output_versions({ (pipeline_name, step_output_handle): version for step_output_handle, version in step_output_versions.items() if version }) step_keys_to_execute = list({ step_output_handle.step_key for step_output_handle in step_output_versions.keys() if (pipeline_name, step_output_handle) not in step_output_addresses }) return execution_plan.build_memoized_plan(step_keys_to_execute, step_output_addresses)
def create_context_creation_data(pipeline_def, environment_dict, pipeline_run, instance, execution_plan): environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, pipeline_run) mode_def = pipeline_def.get_mode_definition(pipeline_run.mode) system_storage_def = system_storage_def_from_config( mode_def, environment_config) executor_def = executor_def_from_config(mode_def, environment_config) execution_target_handle, _ = ExecutionTargetHandle.get_handle(pipeline_def) return ContextCreationData( pipeline_def=pipeline_def, environment_config=environment_config, pipeline_run=pipeline_run, mode_def=mode_def, system_storage_def=system_storage_def, execution_target_handle=execution_target_handle, executor_def=executor_def, instance=instance, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, system_storage_def), )
def test_addresses_for_version(version_storing_context): @solid(version="abc") def solid1(_): yield Output(5, address="some_address") @solid(version="123") def solid2(_, _input1): pass @pipeline def my_pipeline(): solid2(solid1()) with version_storing_context() as ctx: instance, _ = ctx execute_pipeline(instance=instance, pipeline=my_pipeline) step_output_handle = StepOutputHandle("solid1.compute", "result") output_version = resolve_step_output_versions( create_execution_plan(my_pipeline), EnvironmentConfig.build(my_pipeline, {}, "default"), my_pipeline.get_mode_definition("default"), )[step_output_handle] assert instance.get_addresses_for_step_output_versions({ ("my_pipeline", step_output_handle): output_version }) == { ("my_pipeline", step_output_handle): "some_address" }
def create_execution_plan( pipeline: Union[IPipeline, PipelineDefinition], run_config: Optional[dict] = None, mode: Optional[str] = None, step_keys_to_execute: Optional[List[str]] = None, known_state: KnownExecutionState = None, ) -> ExecutionPlan: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build( pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute, known_state=known_state, )
def validate_run_config( pipeline_def: PipelineDefinition, run_config: Optional[Dict[str, Any]] = None, mode: Optional[str] = None, ) -> Dict[str, Any]: """Function to validate a provided run config blob against a given pipeline and mode. If validation is successful, this function will return a dictionary representation of the validated config actually used during execution. Args: pipeline_def (PipelineDefinition): The pipeline definition to validate run config against run_config (Optional[Dict[str, Any]]): The run config to validate mode (str): The mode of the pipeline to validate against (different modes may require different config) Returns: Dict[str, Any]: A dictionary representation of the validated config. """ experimental_fn_warning("validate_run_config") pipeline_def = check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) return EnvironmentConfig.build(pipeline_def, run_config, mode=mode).to_dict()
def create_context_creation_data( execution_plan, run_config, pipeline_run, instance, ): pipeline_def = execution_plan.pipeline.get_definition() environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=pipeline_run.mode) mode_def = pipeline_def.get_mode_definition(pipeline_run.mode) intermediate_storage_def = environment_config.intermediate_storage_def_for_mode( mode_def) executor_def = executor_def_from_config(mode_def, environment_config) return ContextCreationData( pipeline=execution_plan.pipeline, environment_config=environment_config, pipeline_run=pipeline_run, mode_def=mode_def, intermediate_storage_def=intermediate_storage_def, executor_def=executor_def, instance=instance, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, intermediate_storage_def), execution_plan=execution_plan, )
def test_whole_environment(): pipeline_def = PipelineDefinition( name='some_pipeline', mode_definitions=[ ModeDefinition( name='test_mode', resources={ 'test_resource': ResourceDefinition(resource_fn=lambda: None, config_field=Field(Any)) }, ) ], solid_defs=[ SolidDefinition( name='int_config_solid', config_field=Field(Int), inputs=[], outputs=[], compute_fn=lambda *args: None, ), SolidDefinition(name='no_config_solid', inputs=[], outputs=[], compute_fn=lambda *args: None), ], ) environment_type = create_environment_type(pipeline_def) assert (environment_type.fields['resources'].config_type.name == 'SomePipeline.Mode.TestMode.Resources') solids_type = environment_type.fields['solids'].config_type assert solids_type.name == 'SomePipeline.SolidsConfigDictionary' assert (solids_type.fields['int_config_solid'].config_type.name == 'SomePipeline.SolidConfig.IntConfigSolid') assert (environment_type.fields['expectations'].config_type.name == 'SomePipeline.ExpectationsConfig') env = EnvironmentConfig.from_dict( throwing_evaluate_config_value( environment_type, { 'resources': { 'test_resource': { 'config': 1 } }, 'solids': { 'int_config_solid': { 'config': 123 } }, }, )) assert isinstance(env, EnvironmentConfig) assert env.solids == {'int_config_solid': SolidConfig(123)} assert env.expectations == ExpectationsConfig(evaluate=True) assert env.resources == {'test_resource': {'config': 1}}
def _object_manager(init_context): pipeline_run = init_context.pipeline_run instance = init_context.instance_for_backwards_compat pipeline_def = init_context.pipeline_def_for_backwards_compat # depend on InitResourceContext.instance_for_backwards_compat and pipeline_def_for_backwards_compat environment_config = EnvironmentConfig.build(pipeline_def, pipeline_run.run_config, mode=pipeline_run.mode) mode_def = pipeline_def.get_mode_definition(pipeline_run.mode) intermediate_storage_context = InitIntermediateStorageContext( pipeline_def=pipeline_def, mode_def=mode_def, intermediate_storage_def=intermediate_storage_def, pipeline_run=pipeline_run, instance=instance, environment_config=environment_config, type_storage_plugin_registry=construct_type_storage_plugin_registry( pipeline_def, intermediate_storage_def), resources=init_context.resources, intermediate_storage_config=environment_config. intermediate_storage.intermediate_storage_config, ) intermediate_storage = intermediate_storage_def.intermediate_storage_creation_fn( intermediate_storage_context) return IntermediateStorageAdapter(intermediate_storage)
def test_optional_solid_with_optional_scalar_config(): def _assert_config_none(context, value): assert context.solid_config is value pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[ SolidDefinition( name='int_config_solid', config_field=Field(Int, is_optional=True), inputs=[], outputs=[], compute_fn=lambda context, _inputs: _assert_config_none( context, 234), ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_optional is True solids_type = env_type.fields['solids'].config_type assert solids_type.fields['int_config_solid'].is_optional is True solids_default_obj = construct_solid_dictionary( throwing_evaluate_config_value(solids_type, {})) assert solids_default_obj['int_config_solid'].config is None env_obj = EnvironmentConfig.from_dict( throwing_evaluate_config_value(env_type, {})) assert env_obj.solids['int_config_solid'].config is None
def test_provided_default_on_resources_config(): @solid(name='some_solid', input_defs=[], output_defs=[]) def some_solid(_): return None @pipeline( mode_defs=[ ModeDefinition( name='some_mode', resource_defs={ 'some_resource': ResourceDefinition( resource_fn=lambda _: None, config_schema={ 'with_default_int': Field(Int, is_required=False, default_value=23434) }, ) }, ) ] ) def pipeline_def(): some_solid() env_type = create_environment_type(pipeline_def) some_resource_field = env_type.fields['resources'].config_type.fields['some_resource'] assert some_resource_field.is_required is False some_resource_config_field = some_resource_field.config_type.fields['config'] assert some_resource_config_field.is_required is False assert some_resource_config_field.default_value == {'with_default_int': 23434} assert some_resource_field.default_value == {'config': {'with_default_int': 23434}} value = EnvironmentConfig.build(pipeline_def, {}) assert value.resources == {'some_resource': {'config': {'with_default_int': 23434}}}
def basic_resource_versions(): run_config = { "resources": { "basic_resource": { "config": { "input_str": "apple" }, }, "resource_no_version": { "config": { "input_str": "banana" } }, } } environment_config = EnvironmentConfig.build(modes_pipeline, run_config, mode="fakemode") resource_versions_by_key = resolve_resource_versions( environment_config, modes_pipeline) assert resource_versions_by_key["basic_resource"] == join_and_hash( resolve_config_version({"input_str": "apple"}), basic_resource.version) assert resource_versions_by_key["resource_no_version"] == None assert resource_versions_by_key["resource_no_config"] == join_and_hash( join_and_hash(), "42")