def test_failure_propagation(): """ B =========== C // \\ A F (skipped) \\ // D (fails) == E (skipped) """ solid_a = create_root_success_solid("A") def fail_fn(_context, inputs): check.failed("user error") return inputs def success_fn(_context, inputs): return inputs solid_b = single_output_solid( name="B", input_defs=[InputDefinition(name="A")], compute_fn=success_fn, output_def=OutputDefinition(), ) solid_c = single_output_solid( name="C", input_defs=[InputDefinition(name="B")], compute_fn=success_fn, output_def=OutputDefinition(), ) solid_d = single_output_solid( name="D", input_defs=[InputDefinition(name="A")], compute_fn=fail_fn, output_def=OutputDefinition(), ) solid_e = single_output_solid( name="E", input_defs=[InputDefinition(name="D")], compute_fn=success_fn, output_def=OutputDefinition(), ) solid_f = single_output_solid( name="F", input_defs=[InputDefinition(name="C"), InputDefinition(name="E")], compute_fn=success_fn, output_def=OutputDefinition(), ) pipeline_def = PipelineDefinition( solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f], dependencies={ "B": { "A": DependencyDefinition(solid_a.name) }, "D": { "A": DependencyDefinition(solid_a.name) }, "C": { "B": DependencyDefinition(solid_b.name) }, "E": { "D": DependencyDefinition(solid_d.name) }, "F": { "C": DependencyDefinition(solid_c.name), "E": DependencyDefinition(solid_e.name) }, }, ) pipeline_result = execute_pipeline(pipeline_def, raise_on_error=False) assert pipeline_result.result_for_solid("A").success assert pipeline_result.result_for_solid("B").success assert pipeline_result.result_for_solid("C").success assert not pipeline_result.result_for_solid("D").success assert pipeline_result.result_for_solid( "D").failure_data.error.cls_name == "CheckError" assert not pipeline_result.result_for_solid("E").success assert pipeline_result.result_for_solid("E").skipped assert not pipeline_result.result_for_solid("F").success assert pipeline_result.result_for_solid("F").skipped
def define_hello_world_explicit_yield_pipeline(): return PipelineDefinition(name='hello_world_explicit_yield_pipeline', solid_defs=[define_hello_world_explicit_yield()])
def test_failure_propagation(): ''' B =========== C // \\ A F (skipped) \\ // D (fails) == E (skipped) ''' solid_a = create_root_success_solid('A') def fail_fn(_context, inputs): check.failed('user error') return inputs def success_fn(_context, inputs): return inputs solid_b = single_output_solid( name='B', input_defs=[InputDefinition(name='A')], compute_fn=success_fn, output_def=OutputDefinition(), ) solid_c = single_output_solid( name='C', input_defs=[InputDefinition(name='B')], compute_fn=success_fn, output_def=OutputDefinition(), ) solid_d = single_output_solid( name='D', input_defs=[InputDefinition(name='A')], compute_fn=fail_fn, output_def=OutputDefinition(), ) solid_e = single_output_solid( name='E', input_defs=[InputDefinition(name='D')], compute_fn=success_fn, output_def=OutputDefinition(), ) solid_f = single_output_solid( name='F', input_defs=[InputDefinition(name='C'), InputDefinition(name='E')], compute_fn=success_fn, output_def=OutputDefinition(), ) pipeline_def = PipelineDefinition( solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f], dependencies={ 'B': { 'A': DependencyDefinition(solid_a.name) }, 'D': { 'A': DependencyDefinition(solid_a.name) }, 'C': { 'B': DependencyDefinition(solid_b.name) }, 'E': { 'D': DependencyDefinition(solid_d.name) }, 'F': { 'C': DependencyDefinition(solid_c.name), 'E': DependencyDefinition(solid_e.name) }, }, ) pipeline_result = execute_pipeline(pipeline_def, raise_on_error=False) assert pipeline_result.result_for_solid('A').success assert pipeline_result.result_for_solid('B').success assert pipeline_result.result_for_solid('C').success assert not pipeline_result.result_for_solid('D').success assert pipeline_result.result_for_solid( 'D').failure_data.error.cls_name == 'CheckError' assert not pipeline_result.result_for_solid('E').success assert pipeline_result.result_for_solid('E').skipped assert not pipeline_result.result_for_solid('F').success assert pipeline_result.result_for_solid('F').skipped
def define_test_all_scalars_pipeline(): @lambda_solid(input_defs=[InputDefinition('num', Int)]) def take_int(num): return num @lambda_solid(output_def=OutputDefinition(Int)) def produce_int(): return 2 @lambda_solid(input_defs=[InputDefinition('string', String)]) def take_string(string): return string @lambda_solid(output_def=OutputDefinition(String)) def produce_string(): return 'foo' @lambda_solid(input_defs=[InputDefinition('path', Path)]) def take_path(path): return path @lambda_solid(output_def=OutputDefinition(Path)) def produce_path(): return '/path/to/foo' @lambda_solid(input_defs=[InputDefinition('float_number', Float)]) def take_float(float_number): return float_number @lambda_solid(output_def=OutputDefinition(Float)) def produce_float(): return 3.14 @lambda_solid(input_defs=[InputDefinition('bool_value', Bool)]) def take_bool(bool_value): return bool_value @lambda_solid(output_def=OutputDefinition(Bool)) def produce_bool(): return True @lambda_solid(input_defs=[InputDefinition('any_value', Any)]) def take_any(any_value): return any_value @lambda_solid(output_def=OutputDefinition(Any)) def produce_any(): return True @lambda_solid(input_defs=[InputDefinition('string_list', List[String])]) def take_string_list(string_list): return string_list @lambda_solid(input_defs=[InputDefinition('nullable_string', Optional[String])]) def take_nullable_string(nullable_string): return nullable_string return PipelineDefinition( name='test_all_scalars_pipeline', solid_defs=[ produce_any, produce_bool, produce_float, produce_int, produce_path, produce_string, take_any, take_bool, take_float, take_int, take_nullable_string, take_path, take_string, take_string_list, ], )
def define_hello_world_config_pipeline(): return PipelineDefinition(name='hello_world_config_pipeline', solid_defs=[define_hello_world_config_solid()])
def create_diamond_pipeline(): return PipelineDefinition( name="diamond_pipeline", solid_defs=create_diamond_solids(), dependencies=diamond_deps() )
def test_empty_pipeline_execution(): result = execute_pipeline(PipelineDefinition(solid_defs=[])) assert result.success
def _sum_only_pipeline(): return PipelineDefinition(solid_defs=[sum_table, sum_sq_table], dependencies={})
def test_nameless(): noname = PipelineDefinition([return_one]) assert noname.name.startswith("__pipeline") assert noname.display_name.startswith("__pipeline")
def define_empty_pipeline(): return PipelineDefinition(name="empty_pipeline", solid_defs=[])
def define_hello_world_pipeline(): return PipelineDefinition(name="hello_world_pipeline", solid_defs=[define_hello_world_solid()])
def single_string_output_pipeline(): @lambda_solid(output_def=OutputDefinition(String)) def return_foo(): return 'foo' return PipelineDefinition(name='single_string_output_pipeline', solid_defs=[return_foo])
def single_int_output_pipeline(): @lambda_solid(output_def=OutputDefinition(Int)) def return_one(): return 1 return PipelineDefinition(name='single_int_output_pipeline', solid_defs=[return_one])
def define_configurable_hello_pipeline(): return PipelineDefinition(name='configurable_hello_pipeline', solids=[configurable_hello])
def get_context(self, solid_config=None, mode_def=None, environment_dict=None): '''Get a dagstermill execution context for interactive exploration and development. Args: solid_config (Optional[Any]): If specified, this value will be made available on the context as its ``solid_config`` property. mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to use to construct the context. Specify this if you would like a context constructed with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode with a console logger will be constructed. environment_dict(Optional[dict]): The environment config dict with which to construct the context. Returns: :py:class:`~dagstermill.DagstermillExecutionContext` ''' check.opt_inst_param(mode_def, 'mode_def', ModeDefinition) environment_dict = check.opt_dict_param(environment_dict, 'environment_dict', key_type=str) # If we are running non-interactively, and there is already a context reconstituted, return # that context rather than overwriting it. if self.context is not None and isinstance( self.context, DagstermillRuntimeExecutionContext ): return self.context if not mode_def: mode_def = ModeDefinition(logger_defs={'dagstermill': colored_console_logger}) environment_dict['loggers'] = {'dagstermill': {}} solid_def = SolidDefinition( name='this_solid', input_defs=[], compute_fn=lambda *args, **kwargs: None, output_defs=[], description='Ephemeral solid constructed by dagstermill.get_context()', required_resource_keys=mode_def.resource_key_set, ) pipeline_def = PipelineDefinition( [solid_def], mode_defs=[mode_def], name='ephemeral_dagstermill_pipeline' ) run_id = make_new_run_id() # construct stubbed PipelineRun for notebook exploration... # The actual pipeline run during pipeline execution will be serialized and reconstituted # in the `reconstitute_pipeline_context` call pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict, mode=mode_def.name, selector=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, tags=None, ) self.in_pipeline = False self.solid_def = solid_def self.pipeline_def = pipeline_def execution_plan = create_execution_plan(self.pipeline_def, environment_dict, pipeline_run) with scoped_pipeline_context( self.pipeline_def, environment_dict, pipeline_run, DagsterInstance.ephemeral(), execution_plan, scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext( pipeline_context=pipeline_context, solid_config=solid_config, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_context.system_storage_def ), ) return self.context
def test_solid_def(): @lambda_solid def produce_string(): return "foo" @solid( input_defs=[InputDefinition("input_one", String)], output_defs=[OutputDefinition(Any)], config_schema={"another_field": Int}, ) def solid_one(_context, input_one): raise Exception("should not execute") pipeline_def = PipelineDefinition( solid_defs=[produce_string, solid_one], dependencies={ "solid_one": { "input_one": DependencyDefinition("produce_string") } }, ) assert len(pipeline_def.solids[0].output_handles()) == 1 assert isinstance(pipeline_def.solid_named("solid_one"), Solid) solid_one_solid = pipeline_def.solid_named("solid_one") assert solid_one_solid.has_input("input_one") assert isinstance(solid_one_solid.input_def_named("input_one"), InputDefinition) assert len(solid_one_solid.input_dict) == 1 assert len(solid_one_solid.output_dict) == 1 assert str(solid_one_solid.input_handle("input_one")) == ( "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")" ) assert repr(solid_one_solid.input_handle("input_one")) == ( "SolidInputHandle(input_name=\"'input_one'\", solid_name=\"'solid_one'\")" ) assert str(solid_one_solid.output_handle("result")) == ( "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")" ) assert repr(solid_one_solid.output_handle("result")) == ( "SolidOutputHandle(output_name=\"'result'\", solid_name=\"'solid_one'\")" ) assert solid_one_solid.output_handle("result") == SolidOutputHandle( solid_one_solid, solid_one_solid.output_dict["result"]) assert (len( pipeline_def.dependency_structure.input_to_upstream_outputs_for_solid( "solid_one")) == 1) assert (len( pipeline_def.dependency_structure. output_to_downstream_inputs_for_solid("produce_string")) == 1) assert len(pipeline_def.dependency_structure.input_handles()) == 1 assert len(pipeline_def.dependency_structure.items()) == 1
def define_pipeline(): return PipelineDefinition(name='hello_world_pipeline', solids=[hello_world])
def test_pipeline_execution_graph_diamond(): pipeline = PipelineDefinition(solids=create_diamond_solids(), dependencies=diamond_deps()) return _do_test(pipeline, lambda: execute_pipeline_iterator(pipeline))
def test_pipeline_execution_graph_diamond(): pipe = PipelineDefinition(solid_defs=create_diamond_solids(), dependencies=diamond_deps()) return _do_test(pipe)
def test_create_pipeline_with_empty_solids_list(): single_solid_pipeline = PipelineDefinition(solids=[], dependencies={}) result = execute_pipeline(single_solid_pipeline) assert result.success
def test_reexecution_fs_storage_with_solid_selection(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={"add_one": {"num": DependencyDefinition("return_one")}}, ) run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() # Case 1: re-execute a part of a pipeline when the original pipeline doesn't have solid selection pipeline_result = execute_pipeline(pipeline_def, run_config, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid("add_one").output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_result_no_solid_selection = reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result.run_id, run_config=run_config, step_selection=["return_one.compute"], instance=instance, ) assert reexecution_result_no_solid_selection.success assert len(reexecution_result_no_solid_selection.solid_result_list) == 2 assert reexecution_result_no_solid_selection.result_for_solid("add_one").skipped assert reexecution_result_no_solid_selection.result_for_solid("return_one").output_value() == 1 # Case 2: re-execute a pipeline when the original pipeline has solid selection pipeline_result_solid_selection = execute_pipeline( pipeline_def, run_config=run_config, instance=instance, solid_selection=["return_one"], ) assert pipeline_result_solid_selection.success assert len(pipeline_result_solid_selection.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_solid_selection.result_for_solid("add_one") assert pipeline_result_solid_selection.result_for_solid("return_one").output_value() == 1 reexecution_result_solid_selection = reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result_solid_selection.run_id, run_config=run_config, instance=instance, ) assert reexecution_result_solid_selection.success assert len(reexecution_result_solid_selection.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_solid_selection.result_for_solid("add_one") assert reexecution_result_solid_selection.result_for_solid("return_one").output_value() == 1 # Case 3: re-execute a pipeline partially when the original pipeline has solid selection and # re-exeucte a step which hasn't been included in the original pipeline with pytest.raises( DagsterInvalidSubsetError, match=re.escape("No qualified steps to execute found for step_selection"), ): reexecute_pipeline( pipeline_def, parent_run_id=pipeline_result_solid_selection.run_id, run_config=run_config, step_selection=["add_one.compute"], instance=instance, ) # Case 4: re-execute a pipeline partially when the original pipeline has solid selection and # re-exeucte a step which has been included in the original pipeline re_reexecution_result = reexecute_pipeline( pipeline_def, parent_run_id=reexecution_result_solid_selection.run_id, run_config=run_config, instance=instance, step_selection=["return_one.compute"], ) assert re_reexecution_result.success assert len(re_reexecution_result.solid_result_list) == 1 assert re_reexecution_result.result_for_solid("return_one").output_value() == 1
def test_singleton_pipeline(): stub_solid = define_stub_solid('stub', [{'a key': 'a value'}]) single_solid_pipeline = PipelineDefinition(solids=[stub_solid], dependencies={}) result = execute_pipeline(single_solid_pipeline) assert result.success
def define_bad_kernel_pipeline(): return PipelineDefinition(name='bad_kernel_pipeline', solid_defs=[bad_kernel_solid])
def execute_isolated_solid(solid_def, environment_dict=None): return execute_pipeline(PipelineDefinition(name='test', solid_defs=[solid_def]), environment_dict=environment_dict)
def define_hello_world_with_output_pipeline(): return PipelineDefinition(name='hello_world_with_output_pipeline', solid_defs=[define_hello_world_with_output()])
def test_optional_and_required_context(): pipeline_def = PipelineDefinition( name='some_pipeline', solid_defs=[], mode_defs=[ ModeDefinition( name='mixed', resource_defs={ 'optional_resource': ResourceDefinition( lambda: None, config_schema={ 'optional_field': Field(String, is_required=False) }, ), 'required_resource': ResourceDefinition( lambda: None, config_schema={'required_field': String}, ), }, ) ], ) env_type = create_environment_type(pipeline_def) assert env_type.fields['solids'].is_required is False assert env_type.fields['execution'].is_required is False assert nested_field(env_type, 'resources').is_required assert nested_field(env_type, 'resources', 'optional_resource').is_required is False assert nested_field(env_type, 'resources', 'optional_resource', 'config').is_required is False assert (nested_field(env_type, 'resources', 'optional_resource', 'config', 'optional_field').is_required is False) assert nested_field(env_type, 'resources', 'required_resource').is_required assert nested_field(env_type, 'resources', 'required_resource', 'config').is_required assert nested_field(env_type, 'resources', 'required_resource', 'config', 'required_field').is_required env_obj = EnvironmentConfig.build( pipeline_def, { 'resources': { 'required_resource': { 'config': { 'required_field': 'foo' } } } }, ) assert env_obj.resources == { 'optional_resource': { 'config': {} }, 'required_resource': { 'config': { 'required_field': 'foo' } }, }
def define_hello_logging_pipeline(): return PipelineDefinition(name='hello_logging_pipeline', solid_defs=[define_hello_logging_solid()])
def test_execute_isolated_solids_with_bad_solid_names(): with pytest.raises(DagsterInvariantViolationError, match='but that solid was not found'): execute_solids_within_pipeline(PipelineDefinition([]), [], {'foo': {'bar': 'baz'}})
def execute_solid( solid_def, mode_def=None, input_values=None, tags=None, run_config=None, raise_on_error=True, environment_dict=None, ): '''Execute a single solid in an ephemeral pipeline. Intended to support unit tests. Input values may be passed directly, and no pipeline need be specified -- an ephemeral pipeline will be constructed. Args: solid_def (SolidDefinition): The solid to execute. mode_def (Optional[ModeDefinition]): The mode within which to execute the solid. Use this if, e.g., custom resources, loggers, or executors are desired. input_values (Optional[Dict[str, Any]]): A dict of input names to input values, used to pass inputs to the solid directly. You may also use the ``run_config`` to configure any inputs that are configurable. tags (Optional[Dict[str, Any]]): Arbitrary key-value pairs that will be added to pipeline logs. run_config (Optional[dict]): The environment configuration that parameterized this execution, as a dict. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``, since this is the most useful behavior in test. Returns: Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of executing the solid. ''' check.inst_param(solid_def, 'solid_def', ISolidDefinition) check.opt_inst_param(mode_def, 'mode_def', ModeDefinition) input_values = check.opt_dict_param(input_values, 'input_values', key_type=str) # backcompact run_config = canonicalize_run_config(run_config, environment_dict) solid_defs = [solid_def] def create_value_solid(input_name, input_value): @lambda_solid(name=input_name) def input_solid(): return input_value return input_solid dependencies = defaultdict(dict) for input_name, input_value in input_values.items(): dependencies[solid_def.name][input_name] = DependencyDefinition(input_name) solid_defs.append(create_value_solid(input_name, input_value)) result = execute_pipeline( PipelineDefinition( name='ephemeral_{}_solid_pipeline'.format(solid_def.name), solid_defs=solid_defs, dependencies=dependencies, mode_defs=[mode_def] if mode_def else None, ), run_config=run_config, mode=mode_def.name if mode_def else None, tags=tags, raise_on_error=raise_on_error, ) return result.result_for_handle(solid_def.name)
def define_bad_pipeline(): @solid(config_field=Field(Int, default_value='number')) def bad_context(): pass return PipelineDefinition(name='bad', solids=[bad_context])