Пример #1
0
def test_source_asset_conflicts_with_asset():
    bar_source_asset = SourceAsset(key=AssetKey("bar"), description="def")

    @asset
    def bar():
        pass

    job1 = build_assets_job("job1", [bar])

    with pytest.raises(DagsterInvariantViolationError):
        external_asset_graph_from_defs(
            [job1], source_assets_by_key={AssetKey("bar"): bar_source_asset})
Пример #2
0
def test_foreign_asset_conflicts_with_asset():
    bar_foreign_asset = ForeignAsset(key=AssetKey("bar"), description="def")

    @asset
    def bar():
        pass

    @job
    def job1():
        bar()  # pylint: disable=no-value-for-parameter

    with pytest.raises(DagsterInvariantViolationError):
        external_asset_graph_from_defs(
            [job1], foreign_assets_by_key={AssetKey("bar"): bar_foreign_asset})
Пример #3
0
def test_input_name_matches_output_name():
    not_result = SourceAsset(key=AssetKey("not_result"), description=None)

    @asset(ins={"result": AssetIn(asset_key=AssetKey("not_result"))})
    def something(result):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [something], source_assets=[not_result])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("not_result"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("something"), input_name="result"
                )
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("something"),
            dependencies=[
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey("not_result"), input_name="result"
                )
            ],
            depended_by=[],
            op_name="something",
            output_name="result",
            job_names=["assets_job"],
        ),
    ]
Пример #4
0
def test_source_asset_with_op():

    foo = SourceAsset(key=AssetKey("foo"), description=None)

    @asset
    def bar(foo):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [bar], source_assets=[foo])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_description=None,
            dependencies=[],
            depended_by=[ExternalAssetDependedBy(AssetKey("bar"), input_name="foo")],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_name="bar",
            op_description=None,
            dependencies=[ExternalAssetDependency(AssetKey("foo"), input_name="foo")],
            depended_by=[],
            job_names=["assets_job"],
            output_name="result",
        ),
    ]
Пример #5
0
def test_same_asset_in_multiple_pipelines():
    @asset
    def asset1():
        return 1

    @pipeline
    def graph1():
        asset1()

    @pipeline
    def graph2():
        asset1()

    external_asset_nodes = external_asset_graph_from_defs(
        [graph1, graph2], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["graph1", "graph2"],
        ),
    ]
Пример #6
0
def test_basic_multi_asset():
    @multi_asset(
        outs={
            f"out{i}": Out(description=f"foo: {i}", asset_key=AssetKey(f"asset{i}"))
            for i in range(10)
        }
    )
    def assets():
        pass

    assets_job = build_assets_job("assets_job", [assets])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey(f"asset{i}"),
            dependencies=[],
            depended_by=[],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name=f"out{i}",
            output_description=f"foo: {i}",
        )
        for i in range(10)
    ]
Пример #7
0
def test_unused_foreign_asset():
    foo = ForeignAsset(key=AssetKey("foo"), description="abc")
    bar = ForeignAsset(key=AssetKey("bar"), description="def")

    external_asset_nodes = external_asset_graph_from_defs(
        [], foreign_assets_by_key={
            AssetKey("foo"): foo,
            AssetKey("bar"): bar
        })
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_description="abc",
            dependencies=[],
            depended_by=[],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[],
            job_names=[],
        ),
    ]
Пример #8
0
def test_used_source_asset():
    bar = SourceAsset(key=AssetKey("bar"), description="def")

    @asset
    def foo(bar):
        assert bar

    job1 = build_assets_job("job1", [foo], source_assets=[bar])

    external_asset_nodes = external_asset_graph_from_defs(
        [job1], source_assets_by_key={AssetKey("bar"): bar}
    )
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]), input_name="bar")
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_name="foo",
            op_description=None,
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]), input_name="bar")
            ],
            depended_by=[],
            job_names=["job1"],
            output_name="result",
            output_description=None,
        ),
    ]
Пример #9
0
def test_two_downstream_assets_job():
    @asset
    def asset1():
        return 1

    @asset
    def asset2_a(asset1):
        assert asset1 == 1

    @asset
    def asset2_b(asset1):
        assert asset1 == 1

    assets_job = build_assets_job("assets_job", [asset1, asset2_a, asset2_b])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2_a"), input_name="asset1"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2_b"), input_name="asset1"
                ),
            ],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2_a"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2_a",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2_b"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2_b",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
    ]
Пример #10
0
def test_cross_pipeline_asset_dependency():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    @pipeline
    def asset1_graph():
        asset1()

    @pipeline
    def asset2_graph():
        asset2()  # pylint: disable=no-value-for-parameter

    external_asset_nodes = external_asset_graph_from_defs(
        [asset1_graph, asset2_graph], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["asset1_graph"],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["asset2_graph"],
        ),
    ]
Пример #11
0
def test_cross_job_asset_dependency():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    assets_job1 = build_assets_job("assets_job1", [asset1])
    assets_job2 = build_assets_job("assets_job2", [asset2],
                                   source_assets=[asset1])
    external_asset_nodes = external_asset_graph_from_defs(
        [assets_job1, assets_job2], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job1"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["assets_job2"],
            output_name="result",
            output_description=None,
        ),
    ]
Пример #12
0
def test_single_asset_job():
    @asset
    def asset1():
        return 1

    assets_job = build_assets_job("assets_job", [asset1])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        )
    ]
Пример #13
0
def test_two_asset_pipeline():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    @pipeline
    def my_graph():
        asset2(asset1())

    external_asset_nodes = external_asset_graph_from_defs(
        [my_graph], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["my_graph"],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["my_graph"],
        ),
    ]
Пример #14
0
def test_single_asset_pipeline():
    @asset
    def asset1():
        return 1

    @pipeline
    def my_graph():
        asset1()

    external_asset_nodes = external_asset_graph_from_defs(
        [my_graph], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["my_graph"],
        )
    ]
Пример #15
0
def test_same_asset_in_multiple_pipelines():
    @asset
    def asset1():
        return 1

    job1 = build_assets_job("job1", [asset1])
    job2 = build_assets_job("job2", [asset1])

    external_asset_nodes = external_asset_graph_from_defs([job1, job2], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["job1", "job2"],
            output_name="result",
            output_description=None,
        ),
    ]
Пример #16
0
def test_used_foreign_asset():
    bar = ForeignAsset(key=AssetKey("bar"), description="def")

    @asset
    def foo(bar):
        assert bar

    @job
    def job1():
        foo()  # pylint: disable=no-value-for-parameter

    external_asset_nodes = external_asset_graph_from_defs(
        [job1], foreign_assets_by_key={AssetKey("bar"): bar})
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]),
                                        input_name="bar")
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_name="foo",
            op_description=None,
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]),
                                        input_name="bar")
            ],
            depended_by=[],
            job_names=["job1"],
        ),
    ]
Пример #17
0
def test_inter_op_dependency():
    @asset
    def in1():
        pass

    @asset
    def in2():
        pass

    @asset
    def downstream(only_in, mixed, only_out):  # pylint: disable=unused-argument
        pass

    @multi_asset(
        outs={"only_in": Out(), "mixed": Out(), "only_out": Out()},
        internal_asset_deps={
            "mixed": {AssetKey("in1"), AssetKey("only_in")},
            "only_out": {AssetKey("only_in"), AssetKey("mixed")},
        },
    )
    def assets(in1, in2):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [in1, in2, assets, downstream])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})
    # sort so that test is deterministic
    sorted_nodes = sorted(
        [
            node._replace(
                dependencies=sorted(node.dependencies, key=lambda d: d.upstream_asset_key),
                depended_by=sorted(node.depended_by, key=lambda d: d.downstream_asset_key),
            )
            for node in external_asset_nodes
        ],
        key=lambda n: n.asset_key,
    )

    assert sorted_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey(["downstream"]),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["mixed"]), input_name="mixed"),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_in"]), input_name="only_in"
                ),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_out"]), input_name="only_out"
                ),
            ],
            depended_by=[],
            op_name="downstream",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["in1"]),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["mixed"]), input_name="in1"),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_in"]), input_name="in1"
                ),
            ],
            op_name="in1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["in2"]),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_in"]), input_name="in2"
                )
            ],
            op_name="in2",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["mixed"]),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["in1"]), input_name="in1"),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_in"]), output_name="only_in"
                ),
            ],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["downstream"]), input_name="mixed"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_out"]), output_name="mixed"
                ),
            ],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name="mixed",
            metadata_entries=[
                MetadataEntry(
                    label=".dagster/asset_deps",
                    description=None,
                    entry_data=MetadataValue.text("[set] (unserializable)"),
                )
            ],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["only_in"]),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["in1"]), input_name="in1"),
                ExternalAssetDependency(upstream_asset_key=AssetKey(["in2"]), input_name="in2"),
            ],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["downstream"]), input_name="only_in"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["mixed"]), output_name="only_in"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_out"]), output_name="only_in"
                ),
            ],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name="only_in",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["only_out"]),
            dependencies=[
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["mixed"]), output_name="mixed"
                ),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_in"]), output_name="only_in"
                ),
            ],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["downstream"]), input_name="only_out"
                ),
            ],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name="only_out",
            metadata_entries=[
                MetadataEntry(
                    label=".dagster/asset_deps",
                    description=None,
                    entry_data=MetadataValue.text("[set] (unserializable)"),
                )
            ],
        ),
    ]