def test_input_name_matches_output_name(): not_result = SourceAsset(key=AssetKey("not_result"), description=None) @asset(ins={"result": AssetIn(asset_key=AssetKey("not_result"))}) def something(result): # pylint: disable=unused-argument pass assets_job = build_assets_job("assets_job", [something], source_assets=[not_result]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("not_result"), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey("something"), input_name="result" ) ], job_names=[], ), ExternalAssetNode( asset_key=AssetKey("something"), dependencies=[ ExternalAssetDependency( upstream_asset_key=AssetKey("not_result"), input_name="result" ) ], depended_by=[], op_name="something", output_name="result", job_names=["assets_job"], ), ]
def test_unused_foreign_asset(): foo = ForeignAsset(key=AssetKey("foo"), description="abc") bar = ForeignAsset(key=AssetKey("bar"), description="def") external_asset_nodes = external_asset_graph_from_defs( [], foreign_assets_by_key={ AssetKey("foo"): foo, AssetKey("bar"): bar }) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("foo"), op_description="abc", dependencies=[], depended_by=[], job_names=[], ), ExternalAssetNode( asset_key=AssetKey("bar"), op_description="def", dependencies=[], depended_by=[], job_names=[], ), ]
def test_source_asset_with_op(): foo = SourceAsset(key=AssetKey("foo"), description=None) @asset def bar(foo): # pylint: disable=unused-argument pass assets_job = build_assets_job("assets_job", [bar], source_assets=[foo]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("foo"), op_description=None, dependencies=[], depended_by=[ExternalAssetDependedBy(AssetKey("bar"), input_name="foo")], job_names=[], ), ExternalAssetNode( asset_key=AssetKey("bar"), op_name="bar", op_description=None, dependencies=[ExternalAssetDependency(AssetKey("foo"), input_name="foo")], depended_by=[], job_names=["assets_job"], output_name="result", ), ]
def test_used_source_asset(): bar = SourceAsset(key=AssetKey("bar"), description="def") @asset def foo(bar): assert bar job1 = build_assets_job("job1", [foo], source_assets=[bar]) external_asset_nodes = external_asset_graph_from_defs( [job1], source_assets_by_key={AssetKey("bar"): bar} ) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("bar"), op_description="def", dependencies=[], depended_by=[ ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]), input_name="bar") ], job_names=[], ), ExternalAssetNode( asset_key=AssetKey("foo"), op_name="foo", op_description=None, dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]), input_name="bar") ], depended_by=[], job_names=["job1"], output_name="result", output_description=None, ), ]
def test_two_downstream_assets_job(): @asset def asset1(): return 1 @asset def asset2_a(asset1): assert asset1 == 1 @asset def asset2_b(asset1): assert asset1 == 1 assets_job = build_assets_job("assets_job", [asset1, asset2_a, asset2_b]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey("asset2_a"), input_name="asset1" ), ExternalAssetDependedBy( downstream_asset_key=AssetKey("asset2_b"), input_name="asset1" ), ], op_name="asset1", op_description=None, job_names=["assets_job"], output_name="result", output_description=None, ), ExternalAssetNode( asset_key=AssetKey("asset2_a"), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1") ], depended_by=[], op_name="asset2_a", op_description=None, job_names=["assets_job"], output_name="result", output_description=None, ), ExternalAssetNode( asset_key=AssetKey("asset2_b"), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1") ], depended_by=[], op_name="asset2_b", op_description=None, job_names=["assets_job"], output_name="result", output_description=None, ), ]
def test_same_asset_in_multiple_pipelines(): @asset def asset1(): return 1 @pipeline def graph1(): asset1() @pipeline def graph2(): asset1() external_asset_nodes = external_asset_graph_from_defs( [graph1, graph2], foreign_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[], op_name="asset1", op_description=None, job_names=["graph1", "graph2"], ), ]
def test_basic_multi_asset(): @multi_asset( outs={ f"out{i}": Out(description=f"foo: {i}", asset_key=AssetKey(f"asset{i}")) for i in range(10) } ) def assets(): pass assets_job = build_assets_job("assets_job", [assets]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey(f"asset{i}"), dependencies=[], depended_by=[], op_name="assets", op_description=None, job_names=["assets_job"], output_name=f"out{i}", output_description=f"foo: {i}", ) for i in range(10) ]
def test_cross_pipeline_asset_dependency(): @asset def asset1(): return 1 @asset def asset2(asset1): assert asset1 == 1 @pipeline def asset1_graph(): asset1() @pipeline def asset2_graph(): asset2() # pylint: disable=no-value-for-parameter external_asset_nodes = external_asset_graph_from_defs( [asset1_graph, asset2_graph], foreign_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey("asset2"), input_name="asset1") ], op_name="asset1", op_description=None, job_names=["asset1_graph"], ), ExternalAssetNode( asset_key=AssetKey("asset2"), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1") ], depended_by=[], op_name="asset2", op_description=None, job_names=["asset2_graph"], ), ]
def test_cross_job_asset_dependency(): @asset def asset1(): return 1 @asset def asset2(asset1): assert asset1 == 1 assets_job1 = build_assets_job("assets_job1", [asset1]) assets_job2 = build_assets_job("assets_job2", [asset2], source_assets=[asset1]) external_asset_nodes = external_asset_graph_from_defs( [assets_job1, assets_job2], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey("asset2"), input_name="asset1") ], op_name="asset1", op_description=None, job_names=["assets_job1"], output_name="result", output_description=None, ), ExternalAssetNode( asset_key=AssetKey("asset2"), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1") ], depended_by=[], op_name="asset2", op_description=None, job_names=["assets_job2"], output_name="result", output_description=None, ), ]
def test_two_asset_pipeline(): @asset def asset1(): return 1 @asset def asset2(asset1): assert asset1 == 1 @pipeline def my_graph(): asset2(asset1()) external_asset_nodes = external_asset_graph_from_defs( [my_graph], foreign_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey("asset2"), input_name="asset1") ], op_name="asset1", op_description=None, job_names=["my_graph"], ), ExternalAssetNode( asset_key=AssetKey("asset2"), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1") ], depended_by=[], op_name="asset2", op_description=None, job_names=["my_graph"], ), ]
def test_used_foreign_asset(): bar = ForeignAsset(key=AssetKey("bar"), description="def") @asset def foo(bar): assert bar @job def job1(): foo() # pylint: disable=no-value-for-parameter external_asset_nodes = external_asset_graph_from_defs( [job1], foreign_assets_by_key={AssetKey("bar"): bar}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("bar"), op_description="def", dependencies=[], depended_by=[ ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]), input_name="bar") ], job_names=[], ), ExternalAssetNode( asset_key=AssetKey("foo"), op_name="foo", op_description=None, dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]), input_name="bar") ], depended_by=[], job_names=["job1"], ), ]
def test_single_asset_job(): @asset def asset1(): return 1 assets_job = build_assets_job("assets_job", [asset1]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[], op_name="asset1", op_description=None, job_names=["assets_job"], output_name="result", output_description=None, ) ]
def test_single_asset_pipeline(): @asset def asset1(): return 1 @pipeline def my_graph(): asset1() external_asset_nodes = external_asset_graph_from_defs( [my_graph], foreign_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[], op_name="asset1", op_description=None, job_names=["my_graph"], ) ]
def test_same_asset_in_multiple_pipelines(): @asset def asset1(): return 1 job1 = build_assets_job("job1", [asset1]) job2 = build_assets_job("job2", [asset1]) external_asset_nodes = external_asset_graph_from_defs([job1, job2], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("asset1"), dependencies=[], depended_by=[], op_name="asset1", op_description=None, job_names=["job1", "job2"], output_name="result", output_description=None, ), ]
def test_inter_op_dependency(): @asset def in1(): pass @asset def in2(): pass @asset def downstream(only_in, mixed, only_out): # pylint: disable=unused-argument pass @multi_asset( outs={"only_in": Out(), "mixed": Out(), "only_out": Out()}, internal_asset_deps={ "mixed": {AssetKey("in1"), AssetKey("only_in")}, "only_out": {AssetKey("only_in"), AssetKey("mixed")}, }, ) def assets(in1, in2): # pylint: disable=unused-argument pass assets_job = build_assets_job("assets_job", [in1, in2, assets, downstream]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) # sort so that test is deterministic sorted_nodes = sorted( [ node._replace( dependencies=sorted(node.dependencies, key=lambda d: d.upstream_asset_key), depended_by=sorted(node.depended_by, key=lambda d: d.downstream_asset_key), ) for node in external_asset_nodes ], key=lambda n: n.asset_key, ) assert sorted_nodes == [ ExternalAssetNode( asset_key=AssetKey(["downstream"]), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey(["mixed"]), input_name="mixed"), ExternalAssetDependency( upstream_asset_key=AssetKey(["only_in"]), input_name="only_in" ), ExternalAssetDependency( upstream_asset_key=AssetKey(["only_out"]), input_name="only_out" ), ], depended_by=[], op_name="downstream", op_description=None, job_names=["assets_job"], output_name="result", metadata_entries=[], ), ExternalAssetNode( asset_key=AssetKey(["in1"]), dependencies=[], depended_by=[ ExternalAssetDependedBy(downstream_asset_key=AssetKey(["mixed"]), input_name="in1"), ExternalAssetDependedBy( downstream_asset_key=AssetKey(["only_in"]), input_name="in1" ), ], op_name="in1", op_description=None, job_names=["assets_job"], output_name="result", metadata_entries=[], ), ExternalAssetNode( asset_key=AssetKey(["in2"]), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey(["only_in"]), input_name="in2" ) ], op_name="in2", op_description=None, job_names=["assets_job"], output_name="result", metadata_entries=[], ), ExternalAssetNode( asset_key=AssetKey(["mixed"]), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey(["in1"]), input_name="in1"), ExternalAssetDependency( upstream_asset_key=AssetKey(["only_in"]), output_name="only_in" ), ], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey(["downstream"]), input_name="mixed" ), ExternalAssetDependedBy( downstream_asset_key=AssetKey(["only_out"]), output_name="mixed" ), ], op_name="assets", op_description=None, job_names=["assets_job"], output_name="mixed", metadata_entries=[ MetadataEntry( label=".dagster/asset_deps", description=None, entry_data=MetadataValue.text("[set] (unserializable)"), ) ], ), ExternalAssetNode( asset_key=AssetKey(["only_in"]), dependencies=[ ExternalAssetDependency(upstream_asset_key=AssetKey(["in1"]), input_name="in1"), ExternalAssetDependency(upstream_asset_key=AssetKey(["in2"]), input_name="in2"), ], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey(["downstream"]), input_name="only_in" ), ExternalAssetDependedBy( downstream_asset_key=AssetKey(["mixed"]), output_name="only_in" ), ExternalAssetDependedBy( downstream_asset_key=AssetKey(["only_out"]), output_name="only_in" ), ], op_name="assets", op_description=None, job_names=["assets_job"], output_name="only_in", metadata_entries=[], ), ExternalAssetNode( asset_key=AssetKey(["only_out"]), dependencies=[ ExternalAssetDependency( upstream_asset_key=AssetKey(["mixed"]), output_name="mixed" ), ExternalAssetDependency( upstream_asset_key=AssetKey(["only_in"]), output_name="only_in" ), ], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey(["downstream"]), input_name="only_out" ), ], op_name="assets", op_description=None, job_names=["assets_job"], output_name="only_out", metadata_entries=[ MetadataEntry( label=".dagster/asset_deps", description=None, entry_data=MetadataValue.text("[set] (unserializable)"), ) ], ), ]
def _build_cross_repo_deps( self, ) -> Tuple[Dict[AssetKey, ExternalAssetNode], Dict[Tuple[str, str], Dict[ AssetKey, List[ExternalAssetDependedBy]]], ]: """ This method constructs a sink asset as an ExternalAssetNode for every asset immediately downstream of a source asset that is defined in another repository as a derived asset. In Dagit, sink assets will display as ForeignAssets, which are external from the repository. This method also stores a mapping from source asset key to ExternalAssetDependedBy nodes that depend on the asset with that key. When get_cross_repo_dependent_assets is called with a derived asset's asset key and its location, all dependent ExternalAssetDependedBy nodes are returned. """ depended_by_assets_by_source_asset: Dict[ AssetKey, List[ExternalAssetDependedBy]] = {} map_defined_asset_to_location: Dict[AssetKey, Tuple[str, str]] = { } # key is asset key, value is tuple (location_name, repo_name) external_asset_node_by_asset_key: Dict[AssetKey, ExternalAssetNode] = { } # only contains derived assets for location in self._context.repository_locations: repositories = location.get_repositories() for repo_name, external_repo in repositories.items(): asset_nodes = external_repo.get_external_asset_nodes() for asset_node in asset_nodes: if not asset_node.op_name: # is source asset if asset_node.asset_key not in depended_by_assets_by_source_asset: depended_by_assets_by_source_asset[ asset_node.asset_key] = [] depended_by_assets_by_source_asset[ asset_node.asset_key].extend( asset_node.depended_by) else: map_defined_asset_to_location[asset_node.asset_key] = ( location.name, repo_name, ) external_asset_node_by_asset_key[ asset_node.asset_key] = asset_node sink_assets: Dict[AssetKey, ExternalAssetNode] = {} external_asset_deps: Dict[Tuple[str, str], Dict[ AssetKey, List[ExternalAssetDependedBy]]] = ( {} ) # nested dict that maps dependedby assets by asset key by location tuple (repo_location.name, repo_name) for source_asset, depended_by_assets in depended_by_assets_by_source_asset.items( ): asset_def_location = map_defined_asset_to_location.get( source_asset, None) if asset_def_location: # source asset is defined as asset in another repository if asset_def_location not in external_asset_deps: external_asset_deps[asset_def_location] = {} if source_asset not in external_asset_deps[asset_def_location]: external_asset_deps[asset_def_location][source_asset] = [] external_asset_deps[asset_def_location][source_asset].extend( depended_by_assets) for asset in depended_by_assets: # SourceAssets defined as ExternalAssetNodes contain no definition data (e.g. # no output or partition definition data) and no job_names. Dagit displays # all ExternalAssetNodes with no job_names as foreign assets, so sink assets # are defined as ExternalAssetNodes with no definition data. sink_assets[ asset.downstream_asset_key] = ExternalAssetNode( asset_key=asset.downstream_asset_key, dependencies=[ ExternalAssetDependency( upstream_asset_key=source_asset, input_name=asset.input_name, output_name=asset.output_name, ) ], depended_by=[], ) return sink_assets, external_asset_deps