def test_multi_output(): @solid(output_defs=[ DynamicOutputDefinition(int, "numbers"), DynamicOutputDefinition(str, "letters"), OutputDefinition(str, "wildcard"), ]) def multiout(_): yield DynamicOutput(1, output_name="numbers", mapping_key="1") yield DynamicOutput(2, output_name="numbers", mapping_key="2") yield DynamicOutput("a", output_name="letters", mapping_key="a") yield DynamicOutput("b", output_name="letters", mapping_key="b") yield DynamicOutput("c", output_name="letters", mapping_key="c") yield Output("*", "wildcard") @solid def double(n): return n * 2 @pipeline def multi_dyn(): numbers, _, _ = multiout() numbers.map(double) pipe_result = execute_pipeline(multi_dyn) assert pipe_result.success result = pipe_result.result_for_solid("multiout") assert len(result.get_output_events_for_compute("numbers")) == 2 assert len(result.get_output_events_for_compute("letters")) == 3 assert result.get_output_event_for_compute("wildcard") assert len(result.compute_output_events_dict["numbers"]) == 2 assert len(result.compute_output_events_dict["letters"]) == 3 assert len(result.compute_output_events_dict["wildcard"]) == 1 assert result.output_values == { "numbers": { "1": 1, "2": 2 }, "letters": { "a": "a", "b": "b", "c": "c" }, "wildcard": "*", } assert result.output_value("numbers") == {"1": 1, "2": 2} assert result.output_value("letters") == {"a": "a", "b": "b", "c": "c"} assert result.output_value("wildcard") == "*" assert pipe_result.output_for_solid("double") == {"1": 2, "2": 4}
def test_context_mapping_key(): _observed = [] @solid def observe_key(context, _dep=None): _observed.append(context.get_mapping_key()) @solid(output_defs=[DynamicOutputDefinition()]) def emit(): yield DynamicOutput(1, mapping_key="key_1") yield DynamicOutput(2, mapping_key="key_2") @pipeline def test(): observe_key() emit().map(observe_key) result = execute_pipeline(test) assert result.success assert _observed == [None, "key_1", "key_2"] # test standalone doesn't throw as well _observed = [] observe_key(build_solid_context()) assert _observed == [None]
def dynamic_pipeline(): @solid def multiply_by_two(context, y): context.log.info("multiply_by_two is returning " + str(y * 2)) return y * 2 @solid def multiply_inputs(context, y, ten, should_fail): current_run = context.instance.get_run_by_id(context.run_id) if should_fail: if y == 2 and current_run.parent_run_id is None: raise Exception() context.log.info("multiply_inputs is returning " + str(y * ten)) return y * ten @solid def emit_ten(_): return 10 @solid(output_defs=[DynamicOutputDefinition()]) def emit(_): for i in range(3): yield DynamicOutput(value=i, mapping_key=str(i)) @solid def sum_numbers(_, nums): return sum(nums) # pylint: disable=no-member multiply_by_two.alias("double_total")(sum_numbers( emit().map(lambda n: multiply_by_two(multiply_inputs(n, emit_ten())), ).collect(), ))
def test_fails_with_wrong_output(): @solid(output_defs=[DynamicOutputDefinition()]) def should_fail(_): yield Output(1) with pytest.raises(DagsterInvariantViolationError, match="must yield DynamicOutput"): execute_solid(should_fail) @solid(output_defs=[DynamicOutputDefinition()]) def should_also_fail(_): return 1 with pytest.raises(DagsterInvariantViolationError, match="must yield DynamicOutput"): execute_solid(should_also_fail)
def test_dynamic(gcs_bucket): @solid(output_defs=[DynamicOutputDefinition()]) def numbers(_): for i in range(3): yield DynamicOutput(i, mapping_key=str(i)) @solid def echo(_, x): return x @pipeline(mode_defs=[ ModeDefinition(resource_defs={ "io_manager": gcs_pickle_io_manager, "gcs": gcs_resource }) ]) def dynamic(): numbers().map(echo) result = execute_pipeline(dynamic, run_config={ "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket } } } }) assert result.success
def test_dynamic_output_async_gen(): @solid(output_defs=[ DynamicOutputDefinition(name="a", is_required=False), OutputDefinition(name="b", is_required=False), ]) async def aio_gen(): yield DynamicOutput(value=1, mapping_key="1", output_name="a") yield DynamicOutput(value=2, mapping_key="2", output_name="a") await asyncio.sleep(0.01) yield Output(value="foo", output_name="b") async def get_results(): res = [] async for output in aio_gen(): res.append(output) return res loop = asyncio.get_event_loop() a1, a2, b = loop.run_until_complete(get_results()) assert a1.value == 1 assert a1.mapping_key == "1" assert a2.value == 2 assert a2.mapping_key == "2" assert b.value == "foo"
def test_multi_out_map(): @solid(output_defs=[DynamicOutputDefinition()]) def emit(): yield DynamicOutput(1, mapping_key="1") yield DynamicOutput(2, mapping_key="2") yield DynamicOutput(3, mapping_key="3") @solid(output_defs=[ OutputDefinition(name="a", is_required=False), OutputDefinition(name="b", is_required=False), OutputDefinition(name="c", is_required=False), ]) def multiout(inp: int): if inp == 1: yield Output(inp, output_name="a") else: yield Output(inp, output_name="b") @solid def echo(a): return a @pipeline def destructure(): a, b, c = emit().map(multiout) echo.alias("echo_a")(a.collect()) echo.alias("echo_b")(b.collect()) echo.alias("echo_c")(c.collect()) result = execute_pipeline(destructure) assert result.result_for_solid("echo_a").output_value() == [1] assert result.result_for_solid("echo_b").output_value() == [2, 3] assert result.result_for_solid( "echo_c").skipped # all fanned in inputs skipped -> solid skips
def test_solid_outputs_access(): called = {} @success_hook def my_success_hook(context): called[context.step_key] = context.solid_output_values @failure_hook def my_failure_hook(context): called[context.step_key] = context.solid_output_values @solid(output_defs=[ OutputDefinition(name="one"), OutputDefinition(name="two"), OutputDefinition(name="three"), ]) def a_solid(_): yield Output(1, "one") yield Output(2, "two") yield Output(3, "three") @solid(output_defs=[ OutputDefinition(name="one"), OutputDefinition(name="two"), ]) def failed_solid(_): yield Output(1, "one") raise SomeUserException() yield Output(3, "two") # pylint: disable=unreachable @solid(output_defs=[DynamicOutputDefinition()]) def dynamic_solid(_): yield DynamicOutput(1, mapping_key="mapping_1") yield DynamicOutput(2, mapping_key="mapping_2") @solid def echo(_, x): return x @my_success_hook @my_failure_hook @pipeline def a_pipeline(): a_solid() failed_solid() dynamic_solid().map(echo) result = execute_pipeline(a_pipeline, raise_on_error=False) assert not result.success assert called.get("a_solid") == {"one": 1, "two": 2, "three": 3} assert called.get("failed_solid") == {"one": 1} assert called.get("dynamic_solid") == { "result": { "mapping_1": 1, "mapping_2": 2 } } assert called.get("echo[mapping_1]") == {"result": 1} assert called.get("echo[mapping_2]") == {"result": 2}
def test_fails_dupe_keys(): @solid(output_defs=[DynamicOutputDefinition()]) def should_fail(_): yield DynamicOutput(True, mapping_key="dunk") yield DynamicOutput(True, mapping_key="dunk") with pytest.raises(DagsterInvariantViolationError, match='mapping_key "dunk" multiple times'): execute_solid(should_fail)
def test_dynamic_output_solid(): @solid(output_defs=[DynamicOutputDefinition()]) def should_work(_): yield DynamicOutput(1, mapping_key="1") yield DynamicOutput(2, mapping_key="2") result = execute_in_process(should_work) assert result.success assert result.output_values["result"]["1"] == 1 assert result.output_values["result"]["2"] == 2
def test_dynamic_output_definition_single_partition_materialization(): entry1 = EventMetadataEntry.int(123, "nrows") entry2 = EventMetadataEntry.float(3.21, "some value") @solid(output_defs=[ OutputDefinition(name="output1", asset_key=AssetKey("table1")) ]) def solid1(_): return Output(None, "output1", metadata_entries=[entry1]) @solid(output_defs=[ DynamicOutputDefinition( name="output2", asset_key=lambda context: AssetKey(context.mapping_key)) ]) def solid2(_, _input1): for i in range(4): yield DynamicOutput( 7, mapping_key=str(i), output_name="output2", metadata_entries=[entry2], ) @solid def do_nothing(_, _input1): pass @pipeline def my_pipeline(): solid2(solid1()).map(do_nothing) result = execute_pipeline(my_pipeline) events = result.step_event_list materializations = [ event for event in events if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 5 check_materialization(materializations[0], AssetKey(["table1"]), metadata_entries=[entry1]) seen_paths = set() for i in range(1, 5): path = materializations[i].asset_key.path seen_paths.add(tuple(path)) check_materialization( materializations[i], AssetKey(path), metadata_entries=[entry2], parent_assets=[AssetLineageInfo(AssetKey(["table1"]))], ) assert len(seen_paths) == 4
def test_dynamic(): @solid(output_defs=[DynamicOutputDefinition(dagster_type=int)]) def dyn_desc(_) -> Iterator[DynamicOutput]: """ Returns: numbers """ yield DynamicOutput(4, "4") assert dyn_desc.output_defs[0].description == "numbers" assert dyn_desc.output_defs[0].is_dynamic
def test_dynamic_output_non_gen(): @solid(output_defs=[DynamicOutputDefinition(name="a", is_required=False)]) def should_not_work(): return DynamicOutput(value=1, mapping_key="1", output_name="a") with pytest.raises( DagsterInvariantViolationError, match="Attempted to return a DynamicOutput from solid. DynamicOuts are only supported " "using yield syntax.", ): should_not_work()
def test_must_unpack_composite(): with pytest.raises( DagsterInvalidDefinitionError, match="Dynamic output must be unpacked by invoking map or collect", ): @composite_solid(output_defs=[DynamicOutputDefinition()]) def composed(): return dynamic_numbers() @pipeline def _should_fail(): echo(composed())
def test_dynamic_output_async_non_gen(): @solid(output_defs=[DynamicOutputDefinition(name="a", is_required=False)]) def should_not_work(): asyncio.sleep(0.01) return DynamicOutput(value=1, mapping_key="1", output_name="a") loop = asyncio.get_event_loop() with pytest.raises( DagsterInvariantViolationError, match="Attempted to return a DynamicOutput from solid. DynamicOuts are only supported " "using yield syntax.", ): loop.run_until_complete(should_not_work())
def test_basic(): @solid(output_defs=[DynamicOutputDefinition()]) def should_work(_): yield DynamicOutput(1, mapping_key="1") yield DynamicOutput(2, mapping_key="2") result = execute_solid(should_work) assert result.success assert len(result.get_output_events_for_compute()) == 2 assert len(result.compute_output_events_dict["result"]) == 2 assert result.output_values == {"result": {"1": 1, "2": 2}} assert result.output_value() == {"1": 1, "2": 2}
def test_multi_output(): @solid(output_defs=[ DynamicOutputDefinition(int, "numbers"), DynamicOutputDefinition(str, "letters"), OutputDefinition(str, "wildcard"), ]) def should_work(_): yield DynamicOutput(1, output_name="numbers", mapping_key="1") yield DynamicOutput(2, output_name="numbers", mapping_key="2") yield DynamicOutput("a", output_name="letters", mapping_key="a") yield DynamicOutput("b", output_name="letters", mapping_key="b") yield DynamicOutput("c", output_name="letters", mapping_key="c") yield Output("*", "wildcard") result = execute_solid(should_work) assert result.success assert len(result.get_output_events_for_compute("numbers")) == 2 assert len(result.get_output_events_for_compute("letters")) == 3 assert result.get_output_event_for_compute("wildcard") assert len(result.compute_output_events_dict["numbers"]) == 2 assert len(result.compute_output_events_dict["letters"]) == 3 assert len(result.compute_output_events_dict["wildcard"]) == 1 assert result.output_values == { "numbers": { "1": 1, "2": 2 }, "letters": { "a": "a", "b": "b", "c": "c" }, "wildcard": "*", } assert result.output_value("numbers") == {"1": 1, "2": 2} assert result.output_value("letters") == {"a": "a", "b": "b", "c": "c"} assert result.output_value("wildcard") == "*"
def test_dynamic_with_op(): @op def passthrough(_ctx, _dep=None): pass @op(output_defs=[DynamicOutputDefinition()]) def emit(): yield DynamicOutput(1, mapping_key="key_1") yield DynamicOutput(2, mapping_key="key_2") @graph def test_graph(): emit().map(passthrough) assert test_graph.execute_in_process().success
def test_multi_composite_out(): with pytest.raises( DagsterInvalidDefinitionError, match="cannot be downstream of more than one dynamic output", ): @composite_solid(output_defs=[DynamicOutputDefinition()]) def composed_echo(): return dynamic_solid().map(echo) @pipeline def _should_fail(): def _complex(item): composed_echo().map(lambda y: add(y, item)) dynamic_solid().map(_complex)
def test_dynamic_output_gen(): @solid(output_defs=[ DynamicOutputDefinition(name="a", is_required=False), OutputDefinition(name="b", is_required=False), ]) def my_dynamic(): yield DynamicOutput(value=1, mapping_key="1", output_name="a") yield DynamicOutput(value=2, mapping_key="2", output_name="a") yield Output(value="foo", output_name="b") a1, a2, b = my_dynamic() assert a1.value == 1 assert a1.mapping_key == "1" assert a2.value == 2 assert a2.mapping_key == "2" assert b.value == "foo"
def test_composite_multi_out(): @composite_solid(output_defs=[ OutputDefinition(Any, "one"), DynamicOutputDefinition(Any, "numbers") ]) def multi_out(): one = emit_one() numbers = dynamic_numbers() return {"one": one, "numbers": numbers} @pipeline def composite_multi(): one, numbers = multi_out() echo(one) numbers.map(echo) result = execute_pipeline(composite_multi) assert result.success
def define_inty_job(): @op(output_defs=[OutputDefinition(Int)]) def return_one(): return 1 @op( input_defs=[InputDefinition("num", Int)], output_defs=[DynamicOutputDefinition(Int)], ) def add_one(num): yield DynamicOutput(num + 1, "foo") yield DynamicOutput(num + 1, "bar") @graph def basic_external_plan_execution(): add_one(return_one()) return basic_external_plan_execution.to_job( resource_defs={"io_manager": adls2_pickle_io_manager, "adls2": adls2_resource} )
def test_fan_in_skips(): @solid(output_defs=[ OutputDefinition(name="nums"), OutputDefinition(name="empty"), OutputDefinition(name="skip", is_required=False), ]) def fork_logic(): yield Output([1, 2, 3], output_name="nums") yield Output([], output_name="empty") @solid(output_defs=[DynamicOutputDefinition(int)]) def emit_dyn(vector): for i in vector: yield DynamicOutput(value=i, mapping_key=f"input_{i}") @solid def total(items): return sum(items) @pipeline def dyn_fork(): nums, empty, skip = fork_logic() total.alias("grand_total")([ total.alias("nums_total")(emit_dyn(nums).map(echo).collect()), total.alias("empty_total")(emit_dyn(empty).map(echo).collect()), total.alias("skip_total")(emit_dyn(skip).map(echo).collect()), ]) result = execute_pipeline(dyn_fork) assert result.success assert result.result_for_solid("nums_total").success assert result.result_for_solid("empty_total").success assert result.result_for_solid( "skip_total").success # arguably should be skip assert result.result_for_solid("grand_total").success assert result.result_for_solid("grand_total").output_value() == 6
def test_direct_dep(): @solid(output_defs=[DynamicOutputDefinition()]) def dynamic_add(_, x): yield DynamicOutput(x + 1, mapping_key="1") yield DynamicOutput(x + 2, mapping_key="2") @pipeline def _is_fine(): def _add(item): dynamic_add(item) dynamic_solid().map(_add) with pytest.raises( DagsterInvalidDefinitionError, match="cannot be downstream of more than one dynamic output", ): @pipeline def _should_fail(): def _add_echo(item): dynamic_add(item).map(echo) dynamic_solid().map(_add_echo) @pipeline def _is_fine(): dynamic_solid().map(dynamic_add) with pytest.raises( DagsterInvalidDefinitionError, match="cannot be downstream of more than one dynamic output", ): @pipeline def _should_fail(): echo(dynamic_solid().map(dynamic_add).collect())
@solid def echo(_, x: int) -> int: return x @solid(config_schema={ "range": Field(int, is_required=False, default_value=3), }) def num_range(context) -> int: return context.solid_config["range"] @solid( output_defs=[DynamicOutputDefinition()], config_schema={ "fail": Field(bool, is_required=False, default_value=False), }, tags={"first": "1"}, ) def emit(context, num: int = 3): if context.solid_config["fail"]: raise Exception("FAILURE") for i in range(num): yield DynamicOutput(value=i, mapping_key=str(i)) @solid def sum_numbers(_, nums):
# pylint: disable=unused-argument, no-value-for-parameter # start_marker import os from typing import List from dagster import DynamicOutput, DynamicOutputDefinition, Field, pipeline, solid from dagster.utils import file_relative_path @solid( config_schema={ "path": Field(str, default_value=file_relative_path(__file__, "sample")) }, output_defs=[DynamicOutputDefinition(str)], ) def files_in_directory(context): path = context.solid_config["path"] dirname, _, filenames = next(os.walk(path)) for file in filenames: yield DynamicOutput( value=os.path.join(dirname, file), # create a mapping key from the file name mapping_key=file.replace(".", "_").replace("-", "_"), ) @solid def process_file(path: str) -> int: # simple example of calculating size
""" id_range, metadata_entries = _id_range_for_time( context.resources.partition_start, context.resources.partition_end, context.resources.hn_client, ) yield Output(id_range, metadata_entries=metadata_entries) @solid( config_schema={"batch_size": Field(int, is_required=False)}, required_resource_keys={"hn_client", "partition_start", "partition_end"}, output_defs=[ DynamicOutputDefinition( Tuple[int, int], description= "A dynamic set of id ranges that cover the range for the partition, divided by batch_size config if provided.", ) ], ) def dynamic_id_ranges_for_time(context): """ For the configured partition start/end, searches for the range of ids that were created in that time """ id_range, metadata_entries = _id_range_for_time( context.resources.partition_start, context.resources.partition_end, context.resources.hn_client, ) start_id, end_id = id_range
def test_tags_to_dynamic_plan(): @solid( tags={ USER_DEFINED_K8S_CONFIG_KEY: { "container_config": { "resources": { "requests": { "cpu": "500m", "memory": "128Mi" }, "limits": { "cpu": "1000m", "memory": "1Gi" }, } } } }) def multiply_inputs(_, x): return 2 * x @solid( tags={ USER_DEFINED_K8S_CONFIG_KEY: { "container_config": { "resources": { "requests": { "cpu": "250m", "memory": "64Mi" }, "limits": { "cpu": "500m", "memory": "2560Mi" }, } } } }, output_defs=[DynamicOutputDefinition()], ) def emit(_): for i in range(3): yield DynamicOutput(value=i, mapping_key=str(i)) @pipeline def k8s_ready(): return emit().map(multiply_inputs) known_state = KnownExecutionState( {}, { emit.name: { "result": ["0", "1", "2"] }, }, ) plan = create_execution_plan(k8s_ready, known_state=known_state) emit_step = plan.get_step_by_key(emit.name) user_defined_k8s_config = get_user_defined_k8s_config(emit_step.tags) assert user_defined_k8s_config.container_config assert user_defined_k8s_config.container_config["resources"] resources = user_defined_k8s_config.container_config["resources"] assert resources["requests"]["cpu"] == "250m" assert resources["requests"]["memory"] == "64Mi" assert resources["limits"]["cpu"] == "500m" assert resources["limits"]["memory"] == "2560Mi" for mapping_key in range(3): multiply_inputs_step = plan.get_step_by_key( f"{multiply_inputs.name}[{mapping_key}]") dynamic_step_user_defined_k8s_config = get_user_defined_k8s_config( multiply_inputs_step.tags) assert dynamic_step_user_defined_k8s_config.container_config assert dynamic_step_user_defined_k8s_config.container_config[ "resources"] resources = dynamic_step_user_defined_k8s_config.container_config[ "resources"] assert resources["requests"]["cpu"] == "500m" assert resources["requests"]["memory"] == "128Mi" assert resources["limits"]["cpu"] == "1000m" assert resources["limits"]["memory"] == "1Gi"