def test_asset_group_source_asset(): foo_fa = SourceAsset(key=AssetKey("foo"), io_manager_key="the_manager") @asset def asset_depends_on_source(foo): return foo class MyIOManager(IOManager): def handle_output(self, context, obj): pass def load_input(self, context): return 5 @io_manager def the_manager(): return MyIOManager() group = AssetGroup( assets=[asset_depends_on_source], source_assets=[foo_fa], resource_defs={"the_manager": the_manager}, ) @repository def the_repo(): return [group] asset_group_underlying_job = the_repo.get_all_jobs()[0] assert AssetGroup.is_base_job_name(asset_group_underlying_job.name) result = asset_group_underlying_job.execute_in_process() assert result.success
def test_materialize_with_selection(): @asset def start_asset(): return "foo" @multi_asset(outs={"o1": Out(asset_key=AssetKey("o1")), "o2": Out(asset_key=AssetKey("o2"))}) def middle_asset(start_asset): return (start_asset, start_asset) @asset def follows_o1(o1): return o1 @asset def follows_o2(o2): return o2 _, io_manager_def = asset_aware_io_manager() group = AssetGroup( [start_asset, middle_asset, follows_o1, follows_o2], resource_defs={"io_manager": io_manager_def}, ) result = group.materialize(selection="*follows_o2") assert result.success assert result.output_for_node("middle_asset", "o1") == "foo" assert result.output_for_node("follows_o2") == "foo" assert result.output_for_node("start_asset") == "foo"
def test_asset_group_from_modules(monkeypatch): from . import asset_package from .asset_package import module_with_assets collection_1 = AssetGroup.from_modules([asset_package, module_with_assets]) assets_1 = [asset.op.name for asset in collection_1.assets] source_assets_1 = [source_asset.key for source_asset in collection_1.source_assets] collection_2 = AssetGroup.from_modules([asset_package, module_with_assets]) assets_2 = [asset.op.name for asset in collection_2.assets] source_assets_2 = [source_asset.key for source_asset in collection_2.source_assets] assert assets_1 == assets_2 assert source_assets_1 == source_assets_2 with monkeypatch.context() as m: @asset def little_richard(): pass m.setattr(asset_package, "little_richard_dup", little_richard, raising=False) with pytest.raises( DagsterInvalidDefinitionError, match=re.escape( "Asset key AssetKey(['little_richard']) is defined multiple times. " "Definitions found in modules: dagster_tests.core_tests.asset_defs_tests.asset_package." ), ): AssetGroup.from_modules([asset_package, module_with_assets])
def test_asset_group_from_list(): @asset def asset_foo(): return "foo" @asset def asset_bar(): return "bar" @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))}) def last_asset(asset_bar): return asset_bar group = AssetGroup(assets=[asset_foo, asset_bar, last_asset]) @repository def the_repo(): return [group] assert len(the_repo.get_all_jobs()) == 1 asset_group_underlying_job = the_repo.get_all_jobs()[0] assert AssetGroup.is_base_job_name(asset_group_underlying_job.name) result = asset_group_underlying_job.execute_in_process() assert result.success
def test_materialize(): @asset def asset_foo(): return "foo" group = AssetGroup(assets=[asset_foo]) result = group.materialize() assert result.success
def test_materialize_with_out_of_process_executor(): @asset def asset_foo(): return "foo" group = AssetGroup(assets=[asset_foo], executor_def=multiprocess_executor) with pytest.raises( DagsterUnmetExecutorRequirementsError, match="'materialize' can only be invoked on AssetGroups which have no executor or have " "the in_process_executor, but the AssetGroup had executor 'multiprocess'", ): group.materialize()
def test_asset_group_from_current_module(): group = AssetGroup.from_current_module() assert {asset.op.name for asset in group.assets} == {"asset_in_current_module"} assert len(group.assets) == 1 assert {source_asset.key for source_asset in group.source_assets} == { AssetKey("source_asset_in_current_module") } assert len(group.source_assets) == 1
def test_asset_group_from_package_module(): from . import asset_package collection_1 = AssetGroup.from_package_module(asset_package) assert len(collection_1.assets) == 6 assets_1 = [asset.op.name for asset in collection_1.assets] source_assets_1 = [source_asset.key for source_asset in collection_1.source_assets] collection_2 = AssetGroup.from_package_module(asset_package) assert len(collection_2.assets) == 6 assets_2 = [asset.op.name for asset in collection_2.assets] source_assets_2 = [source_asset.key for source_asset in collection_2.source_assets] assert assets_1 == assets_2 assert source_assets_1 == source_assets_2
def test_multiple_partitions_defs(): @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05")) def daily_asset(): ... @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05")) def daily_asset2(): ... @asset(partitions_def=DailyPartitionsDefinition(start_date="2020-05-05")) def daily_asset_different_start_date(): ... @asset(partitions_def=HourlyPartitionsDefinition(start_date="2021-05-05-00:00")) def hourly_asset(): ... @asset def unpartitioned_asset(): ... group = AssetGroup( [ daily_asset, daily_asset2, daily_asset_different_start_date, hourly_asset, unpartitioned_asset, ] ) jobs = group.get_base_jobs() assert len(jobs) == 3 assert {job_def.name for job_def in jobs} == { "__ASSET_GROUP_0", "__ASSET_GROUP_1", "__ASSET_GROUP_2", } assert { frozenset([node_def.name for node_def in job_def.all_node_defs]) for job_def in jobs } == { frozenset(["daily_asset", "daily_asset2", "unpartitioned_asset"]), frozenset(["hourly_asset", "unpartitioned_asset"]), frozenset(["daily_asset_different_start_date", "unpartitioned_asset"]), }
def test_asset_group_missing_resources(): @asset(required_resource_keys={"foo"}) def asset_foo(context): return context.resources.foo with pytest.raises( DagsterInvalidDefinitionError, match=r"AssetGroup is missing required resource keys for asset 'asset_foo'. Missing resource keys: \['foo'\]", ): AssetGroup([asset_foo]) source_asset_io_req = SourceAsset(key=AssetKey("foo"), io_manager_key="foo") with pytest.raises( DagsterInvalidDefinitionError, match=r"SourceAsset with key AssetKey\(\['foo'\]\) requires io manager with key 'foo', which was not provided on AssetGroup. Provided keys: \['io_manager', 'root_manager'\]", ): AssetGroup([], source_assets=[source_asset_io_req])
def test_default_io_manager(): @asset def asset_foo(): return "foo" group = AssetGroup(assets=[asset_foo]) assert ( group.resource_defs["io_manager"] # pylint: disable=comparison-with-callable == fs_asset_io_manager )
def test_asset_group_with_resources(): @asset(required_resource_keys={"foo"}) def asset_foo(context): return context.resources.foo @resource def the_resource(): return "foo" group = AssetGroup([asset_foo], resource_defs={"foo": the_resource}) @repository def the_repo(): return [group] asset_group_underlying_job = the_repo.get_all_jobs()[0] assert AssetGroup.is_base_job_name(asset_group_underlying_job.name) result = asset_group_underlying_job.execute_in_process() assert result.success assert result.output_for_node("asset_foo") == "foo"
def test_asset_group_requires_root_manager(): @asset(io_manager_key="blah") def asset_foo(): pass with pytest.raises( DagsterInvalidDefinitionError, match=r"Output 'result' with AssetKey 'AssetKey\(\['asset_foo'\]\)' " r"requires io manager 'blah' but was not provided on asset group. " r"Provided resources: \['io_manager', 'root_manager'\]", ): AssetGroup([asset_foo])
def _load_target_from_module(module: ModuleType, fn_name: str, error_suffix: str) -> object: from dagster.core.asset_defs import AssetGroup from dagster.core.workspace.autodiscovery import LOAD_ALL_ASSETS if fn_name == LOAD_ALL_ASSETS: # LOAD_ALL_ASSETS is a special symbol that's returned when, instead of loading a particular # attribute, we should load all the assets in the module. return AssetGroup.from_modules([module]) else: if not hasattr(module, fn_name): raise DagsterInvariantViolationError( f"{fn_name} not found {error_suffix}") return getattr(module, fn_name)
def test_asset_group_from_package_module(): from . import asset_package collection = AssetGroup.from_package_module(asset_package) assert len(collection.assets) == 4 assert {asset.op.name for asset in collection.assets} == { "little_richard", "miles_davis", "chuck_berry", "bb_king", } assert {source_asset.key for source_asset in collection.source_assets } == {AssetKey("elvis_presley")}
def test_resource_override(): @resource def the_resource(): pass with pytest.raises( DagsterInvalidDefinitionError, match="Resource dictionary included resource with key 'root_manager', " "which is a reserved resource keyword in Dagster. Please change this " "key, and then change all places that require this key to a new value.", ): AssetGroup([], resource_defs={"root_manager": the_resource}) @repository def the_repo(): return [AssetGroup([], resource_defs={"io_manager": mem_io_manager})] asset_group_underlying_job = the_repo.get_all_jobs()[0] assert ( # pylint: disable=comparison-with-callable asset_group_underlying_job.resource_defs["io_manager"] == mem_io_manager )
def loadable_targets_from_loaded_module( module: ModuleType) -> Sequence[LoadableTarget]: loadable_repos = _loadable_targets_of_type(module, RepositoryDefinition) if loadable_repos: return loadable_repos loadable_pipelines = _loadable_targets_of_type(module, PipelineDefinition) loadable_jobs = _loadable_targets_of_type(module, JobDefinition) if len(loadable_pipelines) == 1: return loadable_pipelines elif len(loadable_pipelines) > 1: target_type = "job" if len(loadable_jobs) > 1 else "pipeline" raise DagsterInvariantViolationError(( 'No repository and more than one {target_type} found in "{module_name}". If you load ' "a file or module directly it must have only one {target_type} " "in scope. Found {target_type}s defined in variables or decorated " "functions: {pipeline_symbols}.").format( module_name=module.__name__, pipeline_symbols=repr( [p.attribute for p in loadable_pipelines]), target_type=target_type, )) loadable_graphs = _loadable_targets_of_type(module, GraphDefinition) if len(loadable_graphs) == 1: return loadable_graphs elif len(loadable_graphs) > 1: raise DagsterInvariantViolationError(( 'More than one graph found in "{module_name}". ' "If you load a file or module directly and it has no repositories, jobs, or " "pipelines in scope, it must have no more than one graph in scope. " "Found graphs defined in variables or decorated functions: {graph_symbols}." ).format( module_name=module.__name__, graph_symbols=repr([g.attribute for g in loadable_graphs]), )) loadable_asset_groups = _loadable_targets_of_type(module, AssetGroup) if len(loadable_asset_groups) == 1: return loadable_asset_groups elif len(loadable_asset_groups) > 1: var_names = repr([a.attribute for a in loadable_asset_groups]) raise DagsterInvariantViolationError(( f'More than one asset group found in "{module.__name__}". ' "If you load a file or module directly and it has no repositories, jobs, " "pipeline, or graphs in scope, it must have no more than one asset group in scope. " f"Found asset groups defined in variables: {var_names}.")) asset_group_from_module_assets = AssetGroup.from_modules([module]) if (len(asset_group_from_module_assets.assets) > 0 or len(asset_group_from_module_assets.source_assets) > 0): return [ LoadableTarget(LOAD_ALL_ASSETS, asset_group_from_module_assets) ] raise DagsterInvariantViolationError( "No repositories, jobs, pipelines, graphs, asset groups, or asset definitions found in " f'"{module.__name__}".')
def test_asset_group_build_subset_job(): @asset def start_asset(): return "foo" @multi_asset(outs={ "o1": Out(asset_key=AssetKey("o1")), "o2": Out(asset_key=AssetKey("o2")) }) def middle_asset(start_asset): return (start_asset, start_asset) @asset def follows_o1(o1): return o1 @asset def follows_o2(o2): return o2 _, io_manager_def = asset_aware_io_manager() group = AssetGroup( [start_asset, middle_asset, follows_o1, follows_o2], resource_defs={"io_manager": io_manager_def}, ) full_job = group.build_job("full", selection="*") result = full_job.execute_in_process() assert result.success assert result.output_for_node("follows_o1") == "foo" assert result.output_for_node("follows_o2") == "foo" test_single = group.build_job(name="test_single", selection="follows_o2") assert len(test_single.all_node_defs) == 1 assert test_single.all_node_defs[0].name == "follows_o2" result = test_single.execute_in_process() assert result.success assert result.output_for_node("follows_o2") == "foo" test_up_star = group.build_job(name="test_up_star", selection="*follows_o2") assert len(test_up_star.all_node_defs) == 3 assert set([node.name for node in test_up_star.all_node_defs]) == { "follows_o2", "middle_asset", "start_asset", } result = test_up_star.execute_in_process() assert result.success assert result.output_for_node("middle_asset", "o1") == "foo" assert result.output_for_node("follows_o2") == "foo" assert result.output_for_node("start_asset") == "foo" test_down_star = group.build_job(name="test_down_star", selection="start_asset*") assert len(test_down_star.all_node_defs) == 4 assert set([node.name for node in test_down_star.all_node_defs]) == { "follows_o2", "middle_asset", "start_asset", "follows_o1", } result = test_down_star.execute_in_process() assert result.success assert result.output_for_node("follows_o2") == "foo" test_both_plus = group.build_job(name="test_both_plus", selection=["+o1+", "o2"]) assert len(test_both_plus.all_node_defs) == 4 assert set([node.name for node in test_both_plus.all_node_defs]) == { "follows_o1", "follows_o2", "middle_asset", "start_asset", } result = test_both_plus.execute_in_process() assert result.success assert result.output_for_node("follows_o2") == "foo" test_selection_with_overlap = group.build_job( name="test_multi_asset_multi_selection", selection=["o1", "o2+"]) assert len(test_selection_with_overlap.all_node_defs) == 3 assert set( [node.name for node in test_selection_with_overlap.all_node_defs]) == { "follows_o1", "follows_o2", "middle_asset", } result = test_selection_with_overlap.execute_in_process() assert result.success assert result.output_for_node("follows_o2") == "foo" with pytest.raises( DagsterInvalidDefinitionError, match=r"When attempting to create job 'bad_subset', the clause " r"'doesnt_exist' within the asset key selection did not match any asset " r"keys. Present asset keys: \['start_asset', 'o1', 'o2', 'follows_o1', 'follows_o2'\]", ): group.build_job(name="bad_subset", selection="doesnt_exist") with pytest.raises( DagsterInvalidDefinitionError, match= r"When attempting to create job 'bad_query_arguments', the clause " r"follows_o1= within the asset key selection was invalid. Please review " r"the selection syntax here: " r"https://docs.dagster.io/concepts/ops-jobs-graphs/job-execution#op-selection-syntax.", ): group.build_job(name="bad_query_arguments", selection="follows_o1=") with pytest.raises( DagsterInvalidDefinitionError, match=r"When building job 'test_subselect_only_one_key', the asset " r"'middle_asset' contains asset keys \['o1', 'o2'\], but attempted to " r"select only \['o1'\]. Selecting only some of the asset keys for a " r"particular asset is not yet supported behavior. Please select all " r"asset keys produced by a given asset when subsetting.", ): group.build_job(name="test_subselect_only_one_key", selection="o1")
def the_repo(): return [AssetGroup([], resource_defs={"io_manager": mem_io_manager})]
def the_repo(): return [AssetGroup([the_asset], executor_def=in_process_executor)]
def the_repo(): # pylint: disable=unused-variable return [AssetGroup(assets=[]), AssetGroup(assets=[])]