Пример #1
0
def test_materialize_with_selection():
    @asset
    def start_asset():
        return "foo"

    @multi_asset(outs={"o1": Out(asset_key=AssetKey("o1")), "o2": Out(asset_key=AssetKey("o2"))})
    def middle_asset(start_asset):
        return (start_asset, start_asset)

    @asset
    def follows_o1(o1):
        return o1

    @asset
    def follows_o2(o2):
        return o2

    _, io_manager_def = asset_aware_io_manager()
    group = AssetGroup(
        [start_asset, middle_asset, follows_o1, follows_o2],
        resource_defs={"io_manager": io_manager_def},
    )

    result = group.materialize(selection="*follows_o2")
    assert result.success
    assert result.output_for_node("middle_asset", "o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"
    assert result.output_for_node("start_asset") == "foo"
Пример #2
0
def test_multi_asset_internal_asset_deps_metadata():
    @multi_asset(
        outs={
            "my_out_name": Out(metadata={"foo": "bar"}),
            "my_other_out_name": Out(metadata={"bar": "foo"}),
        },
        internal_asset_deps={
            "my_out_name":
            {AssetKey("my_other_out_name"),
             AssetKey("my_in_name")}
        },
    )
    def my_asset(my_in_name):  # pylint: disable=unused-argument
        yield Output(1, "my_out_name")
        yield Output(2, "my_other_out_name")

    assert my_asset.asset_keys == {
        AssetKey("my_out_name"),
        AssetKey("my_other_out_name")
    }
    assert my_asset.op.output_def_named("my_out_name").metadata == {
        "foo": "bar",
        ASSET_DEPENDENCY_METADATA_KEY:
        {AssetKey("my_other_out_name"),
         AssetKey("my_in_name")},
    }
    assert my_asset.op.output_def_named("my_other_out_name").metadata == {
        "bar": "foo"
    }
Пример #3
0
def test_multi_asset_asset_materialization_planned_events():
    @multi_asset(
        outs={
            "my_out_name": Out(asset_key=AssetKey("my_asset_name")),
            "my_other_out_name": Out(asset_key=AssetKey("my_other_asset")),
        }
    )
    def my_asset():
        yield Output(1, "my_out_name")
        yield Output(2, "my_other_out_name")

    assets_job = build_assets_job("assets_job", [my_asset])

    with instance_for_test() as instance:
        result = assets_job.execute_in_process(instance=instance)
        records = instance.get_event_records(
            EventRecordsFilter(
                DagsterEventType.ASSET_MATERIALIZATION_PLANNED, AssetKey("my_asset_name")
            )
        )
        assert result.run_id == records[0].event_log_entry.run_id
        run_id = result.run_id

        assert instance.run_ids_for_asset_key(AssetKey("my_asset_name")) == [run_id]
        assert instance.run_ids_for_asset_key(AssetKey("my_other_asset")) == [run_id]
Пример #4
0
def test_multi_out():
    @op(out={"a": Out(metadata={"x": 1}), "b": Out(metadata={"y": 2})})
    def my_op() -> Tuple[int, str]:
        return 1, "q"

    assert len(my_op.output_defs) == 2

    assert my_op.outs == {
        "a":
        Out(metadata={"x": 1},
            dagster_type=Int,
            is_required=True,
            io_manager_key="io_manager"),
        "b":
        Out(metadata={"y": 2},
            dagster_type=String,
            is_required=True,
            io_manager_key="io_manager"),
    }
    assert my_op.output_defs[0].metadata == {"x": 1}
    assert my_op.output_defs[0].name == "a"
    assert my_op.output_defs[1].metadata == {"y": 2}
    assert my_op.output_defs[1].name == "b"

    assert my_op() == (1, "q")
Пример #5
0
def test_op_multiout_base():
    @op(out={"a": Out(), "b": Out()})
    def basic_multiout() -> Tuple[int, str]:
        return (5, "foo")

    assert basic_multiout() == (5, "foo")
    result = execute_op_in_graph(basic_multiout)
    assert result.output_for_node("basic_multiout", "a") == 5
    assert result.output_for_node("basic_multiout", "b") == "foo"
Пример #6
0
def test_multiout_dagster_type():
    @op(out={
        "a": Out(dagster_type=even_type),
        "b": Out(dagster_type=even_type)
    })
    def basic_multi() -> Tuple[int, int]:
        return 6, 6

    assert basic_multi() == (6, 6)
Пример #7
0
def test_multi_asset_infer_from_empty_asset_key():
    @multi_asset(outs={"my_out_name": Out(), "my_other_out_name": Out()})
    def my_asset():
        yield Output(1, "my_out_name")
        yield Output(2, "my_other_out_name")

    assert my_asset.asset_keys == {
        AssetKey("my_out_name"),
        AssetKey("my_other_out_name")
    }
Пример #8
0
def test_op_multiout_incorrect_annotation():
    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            "Expected Tuple annotation for multiple outputs, but received non-tuple annotation.",
    ):

        @op(out={"a": Out(), "b": Out()})
        def _incorrect_annotation_op() -> int:
            pass
Пример #9
0
def test_op_multiout_size_mismatch():
    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            "Expected Tuple annotation to have number of entries matching the number of outputs "
            "for more than one output. Expected 2 outputs but annotation has 3.",
    ):

        @op(out={"a": Out(), "b": Out()})
        def _basic_multiout_wrong_annotation() -> Tuple[int, int, int]:
            pass
Пример #10
0
def test_log_event_multi_output():
    @op(out={"out1": Out(), "out2": Out()})
    def the_op(context):
        context.log_event(AssetMaterialization("foo"))
        yield Output(value=1, output_name="out1")
        context.log_event(AssetMaterialization("bar"))
        yield Output(value=2, output_name="out2")
        context.log_event(AssetMaterialization("baz"))

    result = execute_op_in_graph(the_op)
    assert result.success
    assert len(result.asset_materializations_for_node("the_op")) == 3
Пример #11
0
def test_multi_out():
    @op(out={"a": Out(metadata={"x": 1}), "b": Out(metadata={"y": 2})})
    def my_op() -> Tuple[int, str]:
        return 1, "q"

    assert len(my_op.output_defs) == 2

    assert my_op.output_defs[0].metadata == {"x": 1}
    assert my_op.output_defs[0].name == "a"
    assert my_op.output_defs[1].metadata == {"y": 2}
    assert my_op.output_defs[1].name == "b"

    assert my_op() == (1, "q")
Пример #12
0
def test_multi_out_optional():
    @op(
        out={
            "a": Out(metadata={"x": 1}, is_required=False),
            "b": Out(metadata={"y": 2})
        })
    def my_op():
        yield Output(output_name="b", value=2)

    result = execute_op_in_graph(my_op)
    assert result.output_for_node("my_op", "b") == 2

    assert [output.value for output in my_op()] == [2]
Пример #13
0
def test_add_output_metadata():
    @op(out={"out1": Out(), "out2": Out()})
    def the_op(context):
        context.add_output_metadata({"foo": "bar"}, output_name="out1")
        yield Output(value=1, output_name="out1")
        context.add_output_metadata({"bar": "baz"}, output_name="out2")
        yield Output(value=2, output_name="out2")

    context = build_op_context()
    events = list(the_op(context))
    assert len(events) == 2
    assert context.get_output_metadata("out1") == {"foo": "bar"}
    assert context.get_output_metadata("out2") == {"bar": "baz"}
Пример #14
0
def test_output_values():
    @op(out={"a": Out(), "b": Out()})
    def two_outs():
        return 1, 2

    @graph
    def a():
        two_outs()

    result = a.execute_in_process()

    assert result.success
    assert result.output_for_node("two_outs", "a") == 1
    assert result.output_for_node("two_outs", "b") == 2
Пример #15
0
def test_multi_asset_out_name_diff_from_asset_key():
    @multi_asset(
        outs={
            "my_out_name": Out(asset_key=AssetKey("my_asset_name")),
            "my_other_out_name": Out(asset_key=AssetKey("my_other_asset")),
        })
    def my_asset():
        yield Output(1, "my_out_name")
        yield Output(2, "my_other_out_name")

    assert my_asset.asset_keys == {
        AssetKey("my_asset_name"),
        AssetKey("my_other_asset")
    }
Пример #16
0
def test_multi_out_yields():
    @op(out={"a": Out(metadata={"x": 1}), "b": Out(metadata={"y": 2})})
    def my_op():
        yield Output(output_name="a", value=1)
        yield Output(output_name="b", value=2)

    assert my_op.output_defs[0].metadata == {"x": 1}
    assert my_op.output_defs[0].name == "a"
    assert my_op.output_defs[1].metadata == {"y": 2}
    assert my_op.output_defs[1].name == "b"
    result = execute_op_in_graph(my_op)
    assert result.output_for_node("my_op", "a") == 1
    assert result.output_for_node("my_op", "b") == 2

    assert [output.value for output in my_op()] == [1, 2]
Пример #17
0
def test_out():
    @op(out=Out(metadata={"x": 1}))
    def my_op() -> int:
        return 1

    assert my_op.outs == {
        "result":
        Out(metadata={"x": 1},
            dagster_type=Int,
            is_required=True,
            io_manager_key="io_manager")
    }
    assert my_op.output_defs[0].metadata == {"x": 1}
    assert my_op.output_defs[0].name == "result"
    assert my_op() == 1
Пример #18
0
def define_multiple_output_job():
    @op(
        out={
            "foo": Out(Int),
            "foobar": Out(Int),
        }
    )
    def return_two_outputs():
        yield Output(10, "foobar")
        yield Output(5, "foo")

    @job(resource_defs={"io_manager": s3_pickle_io_manager, "s3": s3_test_resource})
    def output_prefix_execution_plan():
        return_two_outputs()

    return output_prefix_execution_plan
Пример #19
0
def test_multi_asset_with_compute_kind():
    @multi_asset(outs={"o1": Out(asset_key=AssetKey("o1"))},
                 compute_kind="sql")
    def my_asset(arg1):
        return arg1

    assert my_asset.op.tags == {"kind": "sql"}
Пример #20
0
def test_out_dagster_type():
    @op(out=Out(dagster_type=even_type))
    def basic() -> int:
        return 6

    assert basic.output_defs[0].dagster_type == even_type
    assert basic() == 6
Пример #21
0
    def build_for_materialization(materialization):
        class DummyIOManager(IOManager):
            def __init__(self):
                self.values = {}

            def handle_output(self, context, obj):
                keys = tuple(context.get_output_identifier())
                self.values[keys] = obj

                context.add_output_metadata({"foo": "bar"})
                yield MetadataEntry("baz", value="baz")
                context.add_output_metadata({"bar": "bar"})
                yield materialization

            def load_input(self, context):
                keys = tuple(context.upstream_output.get_output_identifier())
                return self.values[keys]

        @op(out=Out(asset_key=AssetKey("key_on_out")))
        def the_op():
            return 5

        @graph
        def the_graph():
            the_op()

        return the_graph.execute_in_process(
            resources={"io_manager": DummyIOManager()})
Пример #22
0
def test_dataframe_csv_missing_input_collision():
    called = {}

    @op(out=Out(DataFrame))
    def df_as_output(_context):
        return pd.DataFrame()

    @op(ins={"df": In(DataFrame)})
    def df_as_input(_context, df):  # pylint: disable=W0613
        called["yup"] = True

    @graph
    def overlapping():
        return df_as_input(df_as_output())

    with pytest.raises(DagsterInvalidConfigError) as exc_info:
        overlapping.execute_in_process(
            run_config={
                "ops": {
                    "df_as_input": {
                        "inputs": {"df": {"csv": {"path": file_relative_path(__file__, "num.csv")}}}
                    }
                }
            },
        )

    assert (
        'Error 1: Received unexpected config entry "inputs" at path root:ops:df_as_input.'
        in str(exc_info.value)
    )

    assert "yup" not in called
Пример #23
0
def test_basic_multi_asset():
    @multi_asset(
        outs={
            f"out{i}": Out(description=f"foo: {i}", asset_key=AssetKey(f"asset{i}"))
            for i in range(10)
        }
    )
    def assets():
        pass

    assets_job = build_assets_job("assets_job", [assets])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey(f"asset{i}"),
            dependencies=[],
            depended_by=[],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name=f"out{i}",
            output_description=f"foo: {i}",
        )
        for i in range(10)
    ]
Пример #24
0
def nonce_op(name, n_inputs, n_outputs):
    """Creates an op with the given number of (meaningless) inputs and outputs.

    Config controls the behavior of the nonce op."""
    @op(
        name=name,
        ins={"input_{}".format(i): In()
             for i in range(n_inputs)},
        out={"output_{}".format(i): Out()
             for i in range(n_outputs)},
    )
    def op_fn(context, **_kwargs):
        for i in range(200):
            time.sleep(0.02)
            if i % 1000 == 420:
                context.log.error(
                    "Error message seq={i} from op {name}".format(i=i,
                                                                  name=name))
            elif i % 100 == 0:
                context.log.warning(
                    "Warning message seq={i} from op {name}".format(i=i,
                                                                    name=name))
            elif i % 10 == 0:
                context.log.info("Info message seq={i} from op {name}".format(
                    i=i, name=name))
            else:
                context.log.debug(
                    "Debug message seq={i} from op {name}".format(i=i,
                                                                  name=name))
        for i in range(n_outputs):
            yield Output(value="foo", output_name="output_{}".format(i))

    return op_fn
Пример #25
0
def test_asset_key():
    in_asset_key = AssetKey(["a", "b"])
    out_asset_key = AssetKey(["c", "d"])

    @op(out=Out(asset_key=out_asset_key))
    def before():
        pass

    @op(ins={"a": In(asset_key=in_asset_key)}, out={})
    def after(a):
        assert a

    class MyIOManager(IOManager):
        def load_input(self, context):
            assert context.asset_key == in_asset_key
            assert context.upstream_output.asset_key == out_asset_key
            return 1

        def handle_output(self, context, obj):
            assert context.asset_key == out_asset_key

    @graph
    def my_graph():
        after(before())

    result = my_graph.to_job(resource_defs={
        "io_manager":
        IOManagerDefinition.hardcoded_io_manager(MyIOManager())
    }).execute_in_process()
    assert result.success
Пример #26
0
def test_hello_world():
    @op(ins={"num_csv": In(DataFrame)}, out=Out(DataFrame))
    def hello_world_op(num_csv):
        num_csv["sum"] = num_csv["num1"] + num_csv["num2"]
        return num_csv

    @graph
    def hello_world():
        hello_world_op()

    result = hello_world.execute_in_process(
        run_config={
            "ops": {
                "hello_world_op": {
                    "inputs": {
                        "num_csv": {"csv": {"path": file_relative_path(__file__, "num.csv")}}
                    }
                }
            }
        }
    )
    assert result.success
    assert result.output_for_node("hello_world_op").to_dict("list") == {
        "num1": [1, 3],
        "num2": [2, 4],
        "sum": [3, 7],
    }
Пример #27
0
def test_date_column():
    @op(out=Out(DataFrame))
    def dataframe_constant():
        return pd.DataFrame([{datetime.date(2019, 1, 1): 0}])

    df = dataframe_constant()
    assert isinstance(df, pd.DataFrame)
Пример #28
0
def test_dataframe_pickle_materialization():
    @op(out=Out(DataFrame))
    def return_df(_context):
        return pd.DataFrame({"num1": [1, 3], "num2": [2, 4]})

    @graph
    def return_df_graph():
        return_df()

    with get_temp_file_name() as filename:
        filename = "/tmp/num.pickle"
        result = return_df_graph.execute_in_process(run_config={
            "ops": {
                "return_df": {
                    "outputs": [{
                        "result": {
                            "pickle": {
                                "path": filename
                            }
                        }
                    }]
                }
            }
        }, )

        assert result.success

        df = pd.read_pickle(filename)
        assert df.to_dict("list") == {"num1": [1, 3], "num2": [2, 4]}
Пример #29
0
def test_basic_pipeline_with_pandas_dataframe_dagster_type():
    def compute_event_metadata(dataframe):
        return {"max_pid": str(max(dataframe["pid"]))}

    BasicDF = create_dagster_pandas_dataframe_type(
        name="BasicDF",
        columns=[
            PandasColumn.integer_column("pid", non_nullable=True),
            PandasColumn.string_column("names"),
        ],
        event_metadata_fn=compute_event_metadata,
    )

    @op(out={"basic_dataframe": Out(dagster_type=BasicDF)})
    def create_dataframe(_):
        yield Output(
            DataFrame({"pid": [1, 2, 3], "names": ["foo", "bar", "baz"]}),
            output_name="basic_dataframe",
        )

    @graph
    def basic_graph():
        return create_dataframe()

    result = basic_graph.execute_in_process()
    assert result.success
    for event in result.all_node_events:
        if event.event_type_value == "STEP_OUTPUT":
            mock_df_output_event_metadata = (
                event.event_specific_data.type_check_data.metadata_entries
            )
            assert len(mock_df_output_event_metadata) == 1
            assert any([entry.label == "max_pid" for entry in mock_df_output_event_metadata])
Пример #30
0
def test_op_typing_annotations():
    @op
    def my_dict_op() -> Dict[str, int]:
        return {"foo": 5}

    assert my_dict_op() == {"foo": 5}

    my_output = {"foo": 5}, ("foo", )

    @op(out={"a": Out(), "b": Out()})
    def my_dict_multiout() -> Tuple[Dict[str, int], Tuple[str]]:
        return {"foo": 5}, ("foo", )

    assert my_dict_multiout() == my_output
    result = execute_op_in_graph(my_dict_multiout)
    assert result.output_for_node("my_dict_multiout", "a") == my_output[0]
    assert result.output_for_node("my_dict_multiout", "b") == my_output[1]