def test_resource_requirements_pass(): called = {} @system_storage(required_resource_keys={'yup'}) def storage_with_req(init_context): assert hasattr(init_context.resources, 'yup') assert not hasattr(init_context.resources, 'not_required') assert not hasattr(init_context.resources, 'kjdkfjdkfje') called['called'] = True return create_mem_system_storage_data(init_context) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 'yup': ResourceDefinition.none_resource(), 'not_required': ResourceDefinition.none_resource(), }, system_storage_defs=[storage_with_req], ) ]) def resource_req_pass_pipeline(): pass assert execute_pipeline(resource_req_pass_pipeline, environment_dict={ 'storage': { 'storage_with_req': None } }).success assert called['called']
def test_resource_requirements_pass(): called = {} @system_storage(required_resource_keys={'yup'}) def storage_with_req(init_context): assert hasattr(init_context.resources, 'yup') assert not hasattr(init_context.resources, 'not_required') assert not hasattr(init_context.resources, 'kjdkfjdkfje') called['called'] = True return create_mem_system_storage_data(init_context) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 'yup': ResourceDefinition.none_resource(), 'not_required': ResourceDefinition.none_resource(), }, system_storage_defs=[storage_with_req], ) ]) def resource_req_pass_pipeline(): pass assert execute_pipeline(resource_req_pass_pipeline, environment_dict={ 'storage': { 'storage_with_req': {} } }).success assert called['called'] # This also works with None because storage is a selector and you can indicate your # "selection" by the presence of the key assert execute_pipeline(resource_req_pass_pipeline, environment_dict={ 'storage': { 'storage_with_req': None } }).success
def test_resource_requirements_pass(): called = {} @system_storage(required_resource_keys={"yup"}) def storage_with_req(init_context): assert hasattr(init_context.resources, "yup") assert not hasattr(init_context.resources, "not_required") assert not hasattr(init_context.resources, "kjdkfjdkfje") called["called"] = True return create_mem_system_storage_data(init_context) @pipeline( mode_defs=[ ModeDefinition( resource_defs={ "yup": ResourceDefinition.none_resource(), "not_required": ResourceDefinition.none_resource(), }, system_storage_defs=[storage_with_req], ) ] ) def resource_req_pass_pipeline(): pass assert execute_pipeline( resource_req_pass_pipeline, run_config={"storage": {"storage_with_req": {}}} ).success assert called["called"] # This also works with None because storage is a selector and you can indicate your # "selection" by the presence of the key assert execute_pipeline( resource_req_pass_pipeline, run_config={"storage": {"storage_with_req": None}} ).success
def test_none_resource(): called = {} @solid(required_resource_keys={"test_null"}) def solid_test_null(context): assert context.resources.test_null is None called["yup"] = True pipeline = PipelineDefinition( name="test_none_resource", solid_defs=[solid_test_null], mode_defs=[ModeDefinition(resource_defs={"test_null": ResourceDefinition.none_resource()})], ) result = execute_pipeline(pipeline) assert result.success assert called["yup"]
def assert_assets_match_project(assets): assert len(assets) == 4 assets_by_name = {asset.op.name: asset for asset in assets} assert assets_by_name.keys() == { "sort_hot_cereals_by_calories", "sort_by_calories", "least_caloric", "sort_cold_cereals_by_calories", } for name, asset in assets_by_name.items(): assert name == asset.op.name assert len(asset.op.output_defs) == 1 assert f'["{name}"]' == asset.op.output_defs[ 0].hardcoded_asset_key.to_string() assert asset.op.tags == {"kind": "dbt"} job = build_assets_job( "jarb", assets, resource_defs={"dbt": ResourceDefinition.none_resource()}) assert job.dependencies == { "least_caloric": { "sort_by_calories": DependencyDefinition( solid="sort_by_calories", output="result", ) }, "sort_by_calories": {}, "sort_cold_cereals_by_calories": { "sort_by_calories": DependencyDefinition( solid="sort_by_calories", output="result", ) }, "sort_hot_cereals_by_calories": { "sort_by_calories": DependencyDefinition( solid="sort_by_calories", output="result", ) }, }
def test_resource_requirements_fail(): @system_storage(required_resource_keys={'yup'}) def storage_with_req(init_context): return create_mem_system_storage_data(init_context) with pytest.raises(DagsterInvalidDefinitionError) as exc_info: @pipeline(mode_defs=[ ModeDefinition( resource_defs={'nope': ResourceDefinition.none_resource()}, system_storage_defs=[storage_with_req], ) ]) def _resource_req_pass_pipeline(): pass assert str(exc_info.value) == ( 'Resource "yup" is required by system storagestorage_with_req, but ' 'is not provided by mode "default".')
def test_resource_requirements_fail(): @intermediate_storage(required_resource_keys={"yup"}) def storage_with_req(init_context): return create_mem_system_intermediate_store(init_context) with pytest.raises( DagsterInvalidDefinitionError, match=r'"yup" is required by intermediate storage "storage_with_req"', ): @pipeline( mode_defs=[ ModeDefinition( resource_defs={"nope": ResourceDefinition.none_resource()}, intermediate_storage_defs=[storage_with_req], ) ] ) def _resource_req_pass_pipeline(): pass
def test_none_resource(): called = {} @solid(resources={'test_null'}) def solid_test_null(context): assert context.resources.test_null is None called['yup'] = True pipeline = PipelineDefinition( name='test_none_resource', solids=[solid_test_null], mode_definitions=[ ModeDefinition( resources={'test_null': ResourceDefinition.none_resource()}) ], ) result = execute_pipeline(pipeline) assert result.success assert called['yup']
def test_resource_requirements_fail(): @intermediate_storage(required_resource_keys={"yup"}) def storage_with_req(init_context): return create_mem_system_intermediate_store(init_context) with pytest.raises(DagsterInvalidDefinitionError) as exc_info: @pipeline( mode_defs=[ ModeDefinition( resource_defs={"nope": ResourceDefinition.none_resource()}, intermediate_storage_defs=[storage_with_req], ) ] ) def _resource_req_pass_pipeline(): pass assert str(exc_info.value) == ( "Resource 'yup' is required by intermediate storage 'storage_with_req', but " "is not provided by mode 'default'." )
def test_none_resource(): called = {} @solid def solid_test_null(info): assert info.context.resources.test_null is None called['yup'] = True pipeline = PipelineDefinition( name='test_none_resource', solids=[solid_test_null], context_definitions={ 'default': PipelineContextDefinition( resources={'test_null': ResourceDefinition.none_resource()}) }, ) result = execute_pipeline(pipeline) assert result.success assert called['yup']
def test_download(): with tempfile.TemporaryDirectory() as temp_dir: test_job = AssetGroup.from_package_name( "hacker_news_assets.assets", resource_defs={ "io_manager": fs_io_manager, "partition_start": ResourceDefinition.string_resource(), "partition_end": ResourceDefinition.string_resource(), "parquet_io_manager": local_partitioned_parquet_io_manager.configured( {"base_path": temp_dir} ), "warehouse_io_manager": mem_io_manager, "pyspark": pyspark_resource, "hn_client": hn_snapshot_client, "dbt": ResourceDefinition.none_resource(), }, ).build_job( "test_job", selection=["*comments", "*stories"], ) result = test_job.execute_in_process(partition_key="2020-12-30-00:00") assert result.success
raw_events = make_raw_events() clean_events(raw_events) @schedule(job=event_tables, cron_schedule="0 0 * * *") def event_tables_schedule(_): return {} @graph def event_reports(): make_event_reports = make_solid("make_event_reports", required_resource_keys={"mode"}) make_event_reports() @sensor(job=event_reports.to_job(resource_defs={"mode": ResourceDefinition.none_resource()})) def event_reports_sensor(): pass event_reports_dev = event_reports.to_job(resource_defs={"mode": ResourceDefinition.none_resource()}) @graph def crm_ingest(): """A graph with multiple production jobs""" ingest_users = make_solid("ingest_users", required_resource_keys={"crm"}) ingest_interactions = make_solid("ingest_interactions", required_resource_keys={"crm"}) ingest_users() ingest_interactions()
""" Trains a collaborative filtering model that can recommend HN stories to users based on what stories they've commented on in the past. """ comment_stories = build_comment_stories() user_story_matrix = build_user_story_matrix(comment_stories) recommender_model = build_recommender_model(user_story_matrix) model_perf_notebook(recommender_model) build_component_top_stories(recommender_model, user_story_matrix) build_user_top_recommended_stories(recommender_model, user_story_matrix) story_recommender_prod_job = story_recommender.to_job(resource_defs={ **RESOURCES_PROD, **{ "partition_bounds": ResourceDefinition.none_resource() }, }) story_recommender_staging_job = story_recommender.to_job( resource_defs={ **RESOURCES_STAGING, **{ "partition_bounds": ResourceDefinition.none_resource() }, }) story_recommender_local_job = story_recommender.to_job( resource_defs={ **RESOURCES_LOCAL, **{
@schedule(job=event_tables, cron_schedule="0 0 * * *") def event_tables_schedule(_): return {} @graph def event_reports(): make_event_reports = make_solid("make_event_reports", required_resource_keys={"mode"}) make_event_reports() @sensor(job=event_reports.to_job( resource_defs={"mode": ResourceDefinition.none_resource()})) def event_reports_sensor(): pass event_reports_dev = event_reports.to_job( resource_defs={"mode": ResourceDefinition.none_resource()}) @graph def crm_ingest(): """A graph with multiple production jobs""" ingest_users = make_solid("ingest_users", required_resource_keys={"crm"}) ingest_interactions = make_solid("ingest_interactions", required_resource_keys={"crm"})
"dbt": dbt_prod_resource, # this is an alternative pattern to the configured() api. If you know that you won't want to # further configure this resource per pipeline run, this can be a bit more convenient than # defining an @resource with a config schema. "dbt_assets": ResourceDefinition.hardcoded_resource( SnowflakeQueryDbtAssetResource( { **{ "database": "DEMO_DB" }, **SHARED_SNOWFLAKE_CONF }, "hackernews")), "partition_bounds": ResourceDefinition.none_resource(), }, }) dbt_staging_job = dbt_metrics.to_job( resource_defs={ **RESOURCES_STAGING, **{ "dbt": dbt_staging_resource, "dbt_assets": ResourceDefinition.hardcoded_resource( SnowflakeQueryDbtAssetResource( { **{ "database": "DEMO_DB_STAGING"