def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterStepOutputNotFoundError" in failures[ 0].event_specific_data.error.message
def test_compile(): run_config = RunConfig() environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, run_config=None) plan = ExecutionPlan.build( composition, environment_config, composition.get_mode_definition(run_config.mode)) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', }
def create_execution_plan( pipeline: Union[IPipeline, PipelineDefinition], run_config: Optional[dict] = None, mode: Optional[str] = None, step_keys_to_execute: Optional[List[str]] = None, ) -> ExecutionPlan: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )
def create_execution_plan( pipeline: Union[IPipeline, PipelineDefinition], run_config: Optional[dict] = None, mode: Optional[str] = None, step_keys_to_execute: Optional[List[str]] = None, known_state: KnownExecutionState = None, ) -> ExecutionPlan: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode", default=pipeline_def.get_default_mode_name()) check.opt_nullable_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config, mode=mode) return ExecutionPlan.build( pipeline, resolved_run_config, step_keys_to_execute=step_keys_to_execute, known_state=known_state, )
def test_compile(): environment_config = EnvironmentConfig.build( composition, {"solids": { "add_four": { "inputs": { "num": { "value": 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryPipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { "add_four.add_two.add_one", "add_four.add_two.add_one_2", "add_four.add_two_2.add_one", "add_four.add_two_2.add_one_2", "div_four.div_two", "div_four.div_two_2", "int_to_float", }
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterExecutionLoadInputError" in failures[ 0].event_specific_data.error.message
def create_execution_plan(pipeline, environment_dict=None, mode=None, step_keys_to_execute=None): pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) mode = check.opt_str_param(mode, 'mode', default=pipeline_def.get_default_mode_name()) check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) environment_config = EnvironmentConfig.build(pipeline_def, environment_dict, mode=mode) return ExecutionPlan.build(pipeline, environment_config, mode=mode, step_keys_to_execute=step_keys_to_execute)
def test_compile(): # TODO: remove dependency on legacy_examples # https://github.com/dagster-io/dagster/issues/2653 environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def test_compile(): environment_config = EnvironmentConfig.build( composition, {'solids': { 'add_four': { 'inputs': { 'num': { 'value': 1 } } } }}, ) plan = ExecutionPlan.build(InMemoryExecutablePipeline(composition), environment_config) res = coalesce_execution_steps(plan) assert set(res.keys()) == { 'add_four.add_two.add_one', 'add_four.add_two.add_one_2', 'add_four.add_two_2.add_one', 'add_four.add_two_2.add_one_2', 'div_four.div_two', 'div_four.div_two_2', 'int_to_float', }
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}} result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance) assert result.success ## re-execute add_two environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )
def create_execution_plan(pipeline, environment_dict=None, mode=None): check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) check.opt_str_param(mode, 'mode') environment_config = create_environment_config(pipeline, environment_dict, mode) return ExecutionPlan.build(pipeline, environment_config)
def test_using_intermediate_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: run_config = {"intermediate_storage": {"filesystem": {}}} pipeline = reconstructable(define_inty_pipeline) environment_config = EnvironmentConfig.build( pipeline.get_definition(), run_config=run_config, ) execution_plan = ExecutionPlan.build( pipeline, environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("add_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2)
def _execute_pipeline_iterator(context_or_failure_event): # Due to use of context managers, if the user land code in context or resource init fails # we can get either a pipeline_context or the failure event here. if (isinstance(context_or_failure_event, DagsterEvent) and context_or_failure_event.event_type == DagsterEventType.PIPELINE_INIT_FAILURE): yield context_or_failure_event return pipeline_context = context_or_failure_event check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) yield DagsterEvent.pipeline_start(pipeline_context) execution_plan = ExecutionPlan.build( pipeline_context.pipeline_def, pipeline_context.environment_config, pipeline_context.mode_def, ) steps = execution_plan.topological_steps() if not steps: pipeline_context.log.debug( 'Pipeline {pipeline} has no nodes and no execution will happen'. format(pipeline=pipeline_context.pipeline_def.display_name)) yield DagsterEvent.pipeline_success(pipeline_context) return _setup_reexecution(pipeline_context.run_config, pipeline_context, execution_plan) pipeline_context.log.debug( 'About to execute the compute node graph in the following order {order}' .format(order=[step.key for step in steps])) check.invariant( len([ step_input for step_input in steps[0].step_inputs if step_input.is_from_output ]) == 0) pipeline_success = True try: for event in invoke_executor_on_plan( pipeline_context, execution_plan, pipeline_context.run_config.step_keys_to_execute): if event.is_step_failure: pipeline_success = False yield event finally: if pipeline_success: yield DagsterEvent.pipeline_success(pipeline_context) else: yield DagsterEvent.pipeline_failure(pipeline_context)
def create_execution_plan(pipeline, environment_dict=None, run_config=None): check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) run_config = check.opt_inst_param(run_config, 'run_config', RunConfig, RunConfig()) environment_config = EnvironmentConfig.build(pipeline, environment_dict, run_config) return ExecutionPlan.build( pipeline, environment_config, pipeline.get_mode_definition(run_config.mode) )
def test_using_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: pipeline = reconstructable(define_reconstructable_inty_pipeline) resolved_run_config = ResolvedRunConfig.build( pipeline.get_definition(), ) execution_plan = ExecutionPlan.build( pipeline, resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), resolved_run_config), pipeline, instance, run_config=dict(execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(return_one_step_events, "return_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "return_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), resolved_run_config), pipeline, instance, run_config=dict(execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 2
def test_using_intermediates_file_system_for_subplan(): pipeline = define_inty_pipeline() run_config = {"intermediate_storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate(None, StepOutputHandle("add_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2
def test_using_file_system_for_subplan(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline), resolved_run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )) assert get_step_output(return_one_step_events, "return_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "return_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 2
def execute_list_versions_command(instance, kwargs): check.inst_param(instance, "instance", DagsterInstance) config = list( check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) preset = kwargs.get("preset") mode = kwargs.get("mode") if preset and config: raise click.UsageError("Can not use --preset with --config.") pipeline_origin = get_pipeline_python_origin_from_kwargs(kwargs) pipeline = recon_pipeline_from_origin(pipeline_origin) run_config = get_run_config_from_file_list(config) environment_config = EnvironmentConfig.build(pipeline.get_definition(), run_config, mode=mode) execution_plan = ExecutionPlan.build(pipeline, environment_config) step_output_versions = resolve_step_output_versions( pipeline.get_definition(), execution_plan, environment_config) memoized_plan = resolve_memoized_execution_plan(execution_plan, pipeline.get_definition(), run_config, instance, environment_config) # the step keys that we need to execute are those which do not have their inputs populated. step_keys_not_stored = set(memoized_plan.step_keys_to_execute) table = [] for step_output_handle, version in step_output_versions.items(): table.append([ "{key}.{output}".format(key=step_output_handle.step_key, output=step_output_handle.output_name), version, "stored" if step_output_handle.step_key not in step_keys_not_stored else "to-be-recomputed", ]) table_str = tabulate( table, headers=["Step Output", "Version", "Status of Output"], tablefmt="github") click.echo(table_str)
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute"] assert str(exc_info.value ) == "Can not build subset plan from unknown step: nope.compute" with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan( execution_plan.build_subset_plan( ["nope.compute", "nuh_uh.compute"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, ) assert exc_info.value.step_keys == ["nope.compute", "nuh_uh.compute"] assert ( str(exc_info.value) == "Can not build subset plan from unknown steps: nope.compute, nuh_uh.compute" )
def execute(self): from dagster.core.execution.api import scoped_pipeline_context check.inst(self.run_config.executor_config, MultiprocessExecutorConfig) pipeline = self.run_config.executor_config.handle.build_pipeline_definition( ) with scoped_pipeline_context( pipeline, self.environment_dict, self.run_config.with_tags( pid=str(os.getpid()))) as pipeline_context: execution_plan = ExecutionPlan.build( pipeline_context.pipeline_def, pipeline_context.environment_config) for step_event in InProcessEngine.execute( pipeline_context, execution_plan, step_keys_to_execute=[self.step_key]): yield step_event
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { "storage": { "filesystem": {} }, "intermediate_storage": { "in_memory": {} } } instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert not intermediate_storage.has_intermediate( None, StepOutputHandle("return_one"))
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline(using_file_system=True) instance = DagsterInstance.ephemeral() resolved_run_config = ResolvedRunConfig.build(pipeline, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan.build_subset_plan(["nope.compute"], pipeline, resolved_run_config), InMemoryPipeline(pipeline), instance, pipeline_run=pipeline_run, )
def reconstitute_pipeline_context( self, output_log_path=None, marshal_dir=None, run_config=None, executable_dict=None, pipeline_run_dict=None, solid_handle_kwargs=None, instance_ref_dict=None, ): """Reconstitutes a context for dagstermill-managed execution. You'll see this function called to reconstruct a pipeline context within the ``injected parameters`` cell of a dagstermill output notebook. Users should not call this function interactively except when debugging output notebooks. Use :func:`dagstermill.get_context` in the ``parameters`` cell of your notebook to define a context for interactive exploration and development. This call will be replaced by one to :func:`dagstermill.reconstitute_pipeline_context` when the notebook is executed by dagstermill. """ check.opt_str_param(output_log_path, "output_log_path") check.opt_str_param(marshal_dir, "marshal_dir") run_config = check.opt_dict_param(run_config, "run_config", key_type=str) check.dict_param(pipeline_run_dict, "pipeline_run_dict") check.dict_param(executable_dict, "executable_dict") check.dict_param(solid_handle_kwargs, "solid_handle_kwargs") check.dict_param(instance_ref_dict, "instance_ref_dict") pipeline = ReconstructablePipeline.from_dict(executable_dict) pipeline_def = pipeline.get_definition() try: instance_ref = unpack_value(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) except Exception as err: # pylint: disable=broad-except raise DagstermillError( "Error when attempting to resolve DagsterInstance from serialized InstanceRef" ) from err pipeline_run = unpack_value(pipeline_run_dict) solid_handle = SolidHandle.from_dict(solid_handle_kwargs) solid_def = pipeline_def.get_solid(solid_handle).definition self.marshal_dir = marshal_dir self.in_pipeline = True self.solid_def = solid_def self.pipeline = pipeline environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=pipeline_run.mode) execution_plan = ExecutionPlan.build( self.pipeline, environment_config, step_keys_to_execute=pipeline_run.step_keys_to_execute, ) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, instance, scoped_resources_builder_cm=self._setup_resources, # Set this flag even though we're not in test for clearer error reporting raise_on_error=True, ) as pipeline_context: self.context = DagstermillRuntimeExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=run_config.get("solids", {}).get(solid_def.name, {}).get("config"), resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, environment_config, pipeline_context.intermediate_storage_def, ), solid_name=solid_def.name, ) return self.context
def get_context(self, solid_config=None, mode_def=None, run_config=None): """Get a dagstermill execution context for interactive exploration and development. Args: solid_config (Optional[Any]): If specified, this value will be made available on the context as its ``solid_config`` property. mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to use to construct the context. Specify this if you would like a context constructed with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode with a console logger will be constructed. run_config(Optional[dict]): The environment config dict with which to construct the context. Returns: :py:class:`~dagstermill.DagstermillExecutionContext` """ check.opt_inst_param(mode_def, "mode_def", ModeDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) # If we are running non-interactively, and there is already a context reconstituted, return # that context rather than overwriting it. if self.context is not None and isinstance( self.context, DagstermillRuntimeExecutionContext): return self.context if not mode_def: mode_def = ModeDefinition( logger_defs={"dagstermill": colored_console_logger}) run_config["loggers"] = {"dagstermill": {}} solid_def = SolidDefinition( name="this_solid", input_defs=[], compute_fn=lambda *args, **kwargs: None, output_defs=[], description= "Ephemeral solid constructed by dagstermill.get_context()", required_resource_keys=mode_def.resource_key_set, ) pipeline_def = PipelineDefinition( [solid_def], mode_defs=[mode_def], name="ephemeral_dagstermill_pipeline") run_id = make_new_run_id() # construct stubbed PipelineRun for notebook exploration... # The actual pipeline run during pipeline execution will be serialized and reconstituted # in the `reconstitute_pipeline_context` call pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=mode_def.name, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, tags=None, ) self.in_pipeline = False self.solid_def = solid_def self.pipeline = pipeline_def environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode_def.name) pipeline = InMemoryPipeline(pipeline_def) execution_plan = ExecutionPlan.build(pipeline, environment_config) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, DagsterInstance.ephemeral(), scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=solid_config, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, environment_config, pipeline_context.intermediate_storage_def, ), solid_name=solid_def.name, ) return self.context
def test_gcs_pickle_io_manager_execution(gcs_bucket): inty_job = define_inty_job() run_config = { "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket, } } } } run_id = make_new_run_id() resolved_run_config = ResolvedRunConfig.build(inty_job, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(inty_job), resolved_run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=inty_job.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, inty_job, resolved_run_config), pipeline=InMemoryPipeline(inty_job), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") io_manager = PickledObjectGCSIOManager(gcs_bucket, storage.Client()) step_output_handle = StepOutputHandle("return_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], inty_job, resolved_run_config), pipeline=InMemoryPipeline(inty_job), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2
def test_using_gcs_for_subplan(gcs_bucket): pipeline_def = define_inty_pipeline() run_config = {"intermediate_storage": {"gcs": {"config": {"gcs_bucket": gcs_bucket}}}} run_id = make_new_run_id() environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config ) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(return_one_step_events, "return_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: intermediate_storage = GCSIntermediateStorage( gcs_bucket, run_id, client=context.scoped_resources_builder.build( required_resource_keys={"gcs"}, ).gcs, ) assert intermediate_storage.has_intermediate(context, StepOutputHandle("return_one")) assert ( intermediate_storage.get_intermediate(context, Int, StepOutputHandle("return_one")).obj == 1 ) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) ) assert get_step_output(add_one_step_events, "add_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: assert intermediate_storage.has_intermediate(context, StepOutputHandle("add_one")) assert ( intermediate_storage.get_intermediate(context, Int, StepOutputHandle("add_one")).obj == 2 )
def test_using_s3_for_subplan(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = { "intermediate_storage": { "s3": { "config": { "s3_bucket": mock_s3_bucket.name } } } } run_id = make_new_run_id() environment_config = EnvironmentConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), environment_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["return_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: intermediates_manager = S3IntermediateStorage( mock_s3_bucket.name, run_id, s3_session=context.scoped_resources_builder.build( required_resource_keys={"s3"}, ).s3, ) step_output_handle = StepOutputHandle("return_one") assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate( context, Int, step_output_handle).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, "add_one") with scoped_pipeline_context( execution_plan.build_subset_plan(["add_one"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config, pipeline_run, instance, ) as context: step_output_handle = StepOutputHandle("add_one") assert intermediates_manager.has_intermediate(context, step_output_handle) assert intermediates_manager.get_intermediate( context, Int, step_output_handle).obj == 2
def test_execution_plan_reexecution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() run_config = env_with_fs( {"solids": { "add_one": { "inputs": { "num": { "value": 3 } } } }}) result = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, ) assert result.success intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 ## re-execute add_two environment_config = EnvironmentConfig.build( pipeline_def, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline_def), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, run_config=run_config, parent_run_id=result.run_id, root_run_id=result.run_id, ) step_events = execute_plan( execution_plan.build_subset_plan(["add_two"], pipeline_def, environment_config), InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, result.run_id) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 4 assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_two")).obj == 6 assert not get_step_output_event(step_events, "add_one") assert get_step_output_event(step_events, "add_two")
def test_s3_pickle_io_manager_execution(mock_s3_bucket): pipeline_def = define_inty_pipeline() run_config = { "resources": { "io_manager": { "config": { "s3_bucket": mock_s3_bucket.name } } } } run_id = make_new_run_id() resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config=run_config) execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), resolved_run_config) assert execution_plan.get_step_by_key("return_one") step_keys = ["return_one"] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys, pipeline_def, resolved_run_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, "return_one") io_manager = PickledObjectS3IOManager(mock_s3_bucket.name, construct_s3_client(max_attempts=5), s3_prefix="dagster") step_output_handle = StepOutputHandle("return_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert io_manager.load_input(context) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline_def, resolved_run_config), pipeline=InMemoryPipeline(pipeline_def), run_config=run_config, pipeline_run=pipeline_run, instance=instance, )) step_output_handle = StepOutputHandle("add_one") context = build_input_context(upstream_output=build_output_context( step_key=step_output_handle.step_key, name=step_output_handle.output_name, run_id=run_id, )) assert get_step_output(add_one_step_events, "add_one") assert io_manager.load_input(context) == 2