def scope_dbt_cli_profile_modes(): # start_marker_dbt_cli_profile_modes from dagster_dbt import dbt_cli_resource, dbt_run_op from dagster import graph @graph def my_dbt(): dbt_run_op() my_dbt_graph_dev = my_dbt.to_job( resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": "path/to/dbt/project", "profile": "dev" }) }) my_dbt_graph_prod = my_dbt.to_job( resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": "path/to/dbt/project", "profile": "prod" }) })
def scope_dbt_cli_profile_modes(): # start_marker_dbt_cli_profile_modes from dagster import pipeline, solid, ModeDefinition from dagster_dbt import dbt_cli_resource @solid(required_resource_keys={"dbt"}) def run_all_models(context): context.resources.dbt.run() @pipeline(mode_defs=[ ModeDefinition( "dev", resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": "path/to/dbt/project", "profile": "dev" }) }, ), ModeDefinition( "prod", resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": "path/to/dbt/project", "profile": "prod" }) }, ), ]) def my_dbt_pipeline(): run_all_models()
def get_dbt_resource(project_dir, profiles_dir, **kwargs): kwargs = kwargs or {} return dbt_cli_resource.configured({ "project_dir": project_dir, "profiles_dir": profiles_dir, **kwargs, })
def test_seed_op(conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument dbt_resource = dbt_cli_resource.configured( {"project_dir": test_project_dir, "profiles_dir": dbt_config_dir} ) dbt_result = dbt_seed_op(build_op_context(resources={"dbt": dbt_resource})) assert len(dbt_result.result["results"]) == 1
def test_select_from_project(dbt_seed, conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument dbt_assets = load_assets_from_dbt_project( test_project_dir, dbt_config_dir, select="sort_by_calories subdir.least_caloric") assert dbt_assets[0].op.name == "run_dbt_dagster_dbt_test_project_e4753" result = build_assets_job( "test_job", dbt_assets, resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": test_project_dir, "profiles_dir": dbt_config_dir }) }, ).execute_in_process() assert result.success materializations = [ event.event_specific_data.materialization for event in result.events_for_node(dbt_assets[0].op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 2
def test_node_info_to_asset_key(dbt_seed, conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument dbt_assets = load_assets_from_dbt_project( test_project_dir, dbt_config_dir, node_info_to_asset_key=lambda node_info: AssetKey( ["foo", node_info["name"]]), ) result = build_assets_job( "test_job", dbt_assets, resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": test_project_dir, "profiles_dir": dbt_config_dir }) }, ).execute_in_process() assert result.success materializations = [ event.event_specific_data.materialization for event in result.events_for_node(dbt_assets[0].op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 4 assert materializations[0].asset_key == AssetKey( ["foo", "sort_by_calories"])
def test_select_from_manifest(dbt_seed, conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) dbt_assets = load_assets_from_dbt_manifest( manifest_json, selected_unique_ids={ "model.dagster_dbt_test_project.sort_by_calories", "model.dagster_dbt_test_project.least_caloric", }, ) result = build_assets_job( "test_job", dbt_assets, resource_defs={ "dbt": dbt_cli_resource.configured({ "project_dir": test_project_dir, "profiles_dir": dbt_config_dir }) }, ).execute_in_process() assert result.success materializations = [ event.event_specific_data.materialization for event in result.events_for_node(dbt_assets[0].op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 2
def scope_dbt_cli_resource_config(): # start_marker_dbt_cli_resource_config from dagster_dbt import dbt_cli_resource my_dbt_resource = dbt_cli_resource.configured({ "project_dir": "path/to/dbt/project", "profiles_dir": "path/to/dbt/profiles" })
def scope_dbt_cli_run(): # start_marker_dbt_cli_run_preconfig from dagster import job from dagster_dbt import dbt_cli_resource, dbt_run_op my_dbt_resource = dbt_cli_resource.configured({"project_dir": "path/to/dbt/project"}) @job(resource_defs={"dbt": my_dbt_resource}) def my_dbt_job(): dbt_run_op()
def scope_dbt_cli_run_after_another_op(): # start_marker_dbt_cli_run_after_another_op from dagster_dbt import dbt_cli_resource, dbt_run_op, dbt_test_op from dagster import job my_dbt_resource = dbt_cli_resource.configured( {"project_dir": "path/to/dbt/project"}) @job(resource_defs={"dbt": my_dbt_resource}) def my_dbt_job(): dbt_test_op(start_after=dbt_run_op())
def scope_dbt_cli_config_vars(): # start_marker_dbt_cli_config_vars from dagster import job from dagster_dbt import dbt_cli_resource config = {"vars": {"key": "value"}} @job(resource_defs={"dbt": dbt_cli_resource.configured(config)}) def my_job(): # ... # end_marker_dbt_cli_config_vars pass
def scope_dbt_cli_config_exclude_models(): # start_marker_dbt_cli_config_exclude_models from dagster import job from dagster_dbt import dbt_cli_resource config = {"exclude": ["my_dbt_model+", "path.to.models", "tag:nightly"]} @job(resource_defs={"dbt": dbt_cli_resource.configured(config)}) def my_job(): # ... # end_marker_dbt_cli_config_exclude_models pass
def scope_dbt_cli_config_executable(): # start_marker_dbt_cli_config_executable from dagster import job from dagster_dbt import dbt_cli_resource config = {"dbt_executable": "path/to/dbt/executable"} @job(resource_defs={"dbt": dbt_cli_resource.configured(config)}) def my_job(): # ... # end_marker_dbt_cli_config_executable pass
def scope_dbt_cli_run_specific_models(): # start_marker_dbt_cli_run_specific_models_preconfig from dagster import job from dagster_dbt import dbt_cli_resource, dbt_run_op my_dbt_resource = dbt_cli_resource.configured( {"project_dir": "path/to/dbt/project", "models": ["tag:staging"]} ) @job(resource_defs={"dbt": my_dbt_resource}) def my_dbt_job(): dbt_run_op()
def test_run_op( dbt_seed, conn_string, test_project_dir, dbt_config_dir ): # pylint: disable=unused-argument dbt_resource = dbt_cli_resource.configured( {"project_dir": test_project_dir, "profiles_dir": dbt_config_dir} ) dbt_results = list(dbt_run_op(build_op_context(resources={"dbt": dbt_resource}))) # includes asset materializations assert len(dbt_results) == 5 assert len(dbt_results[-1].value.result["results"]) == 4
def scope_dbt_cli_config_profile_and_target(): PROFILE_NAME, TARGET_NAME = "", "" # start_marker_dbt_cli_config_profile_and_target from dagster import job from dagster_dbt import dbt_cli_resource config = {"profile": PROFILE_NAME, "target": TARGET_NAME} @job(resource_defs={"dbt": dbt_cli_resource.configured(config)}) def my_job(): # ... # end_marker_dbt_cli_config_profile_and_target pass
def scope_dbt_cli_config_exclude_models(): # start_marker_dbt_cli_config_exclude_models from dagster import pipeline, ModeDefinition from dagster_dbt import dbt_cli_resource config = {"exclude": ["my_dbt_model+", "path.to.models", "tag:nightly"]} @pipeline(mode_defs=[ ModeDefinition( resource_defs={"dbt": dbt_cli_resource.configured(config)}) ]) def my_pipeline(): # ... # end_marker_dbt_cli_config_exclude_models pass
def scope_dbt_cli_config_vars(): # start_marker_dbt_cli_config_vars from dagster import pipeline, ModeDefinition from dagster_dbt import dbt_cli_resource config = {"vars": {"key": "value"}} @pipeline(mode_defs=[ ModeDefinition( resource_defs={"dbt": dbt_cli_resource.configured(config)}) ]) def my_pipeline(): # ... # end_marker_dbt_cli_config_vars pass
def scope_dbt_cli_config_executable(): # start_marker_dbt_cli_config_executable from dagster import pipeline, ModeDefinition from dagster_dbt import dbt_cli_resource config = {"dbt_executable": "path/to/dbt/executable"} @pipeline(mode_defs=[ ModeDefinition( resource_defs={"dbt": dbt_cli_resource.configured(config)}) ]) def my_pipeline(): # ... # end_marker_dbt_cli_config_executable pass
def scope_dbt_cli_run(): # start_marker_dbt_cli_run_preconfig from dagster import pipeline, solid, ModeDefinition from dagster_dbt import dbt_cli_resource my_dbt_resource = dbt_cli_resource.configured( {"project_dir": "path/to/dbt/project"}) @solid(required_resource_keys={"dbt"}) def run_all_models(context): context.resources.dbt.run() @pipeline( mode_defs=[ModeDefinition(resource_defs={"dbt": my_dbt_resource})]) def my_dbt_pipeline(): run_all_models()
def scope_dbt_cli_config_profile_and_target(): PROFILE_NAME, TARGET_NAME = "", "" # start_marker_dbt_cli_config_profile_and_target from dagster import pipeline, ModeDefinition from dagster_dbt import dbt_cli_resource config = {"profile": PROFILE_NAME, "target": TARGET_NAME} @pipeline(mode_defs=[ ModeDefinition( resource_defs={"dbt": dbt_cli_resource.configured(config)}) ]) def my_pipeline(): # ... # end_marker_dbt_cli_config_profile_and_target pass
def test_run_test_job(dbt_seed, conn_string, test_project_dir, dbt_config_dir): # pylint: disable=unused-argument dbt_resource = dbt_cli_resource.configured({ "project_dir": test_project_dir, "profiles_dir": dbt_config_dir }) @job(resource_defs={"dbt": dbt_resource}) def run_test_job(): dbt_test_op(start_after=dbt_run_op()) dbt_result = run_test_job.execute_in_process() dbt_run_result = dbt_result.output_for_node("dbt_run_op") dbt_test_result = dbt_result.output_for_node("dbt_test_op") assert len(dbt_run_result.result["results"]) == 4 assert len(dbt_test_result.result["results"]) == 15
def scope_dbt_cli_run_after_another_solid(): # start_marker_dbt_cli_run_after_another_solid from dagster import pipeline, solid, ModeDefinition from dagster_dbt import dbt_cli_resource, DbtCliOutput my_dbt_resource = dbt_cli_resource.configured( {"project_dir": "path/to/dbt/project"}) @solid(required_resource_keys={"dbt"}) def run_models(context) -> DbtCliOutput: return context.resources.dbt.run() @solid(required_resource_keys={"dbt"}) def test_models(context, run_result: DbtCliOutput): context.log.info(f"testing result of `{run_result.command}`!") context.resources.dbt.test() @pipeline( mode_defs=[ModeDefinition(resource_defs={"dbt": my_dbt_resource})]) def my_dbt_pipeline(): run_result = run_models() test_models(run_result)
SHARED_SNOWFLAKE_CONF, connect_snowflake, snowflake_io_manager_dev, snowflake_io_manager_prod, ) DBT_PROJECT_DIR = file_relative_path(__file__, "../../hacker_news_dbt") DBT_PROFILES_DIR = DBT_PROJECT_DIR + "/config" # We define two sets of resources, one for the prod mode, which writes to production schemas and # one for dev mode, which writes to alternate schemas PROD_RESOURCES = { "dbt": dbt_cli_resource.configured({ "profiles_dir": DBT_PROFILES_DIR, "project_dir": DBT_PROJECT_DIR, "target": "prod" }), "warehouse_io_manager": snowflake_io_manager_prod, # "parquet_io_manager": parquet_io_manager.configured({"base_path": get_system_temp_directory()}), "pyspark": pyspark_resource, } DEV_RESOURCES = { "dbt": dbt_cli_resource.configured({ "profiles-dir": DBT_PROFILES_DIR, "project-dir": DBT_PROJECT_DIR, "target": "dev"
) from ops.gdelt_mining_ops import enhance_articles, materialize_gdelt_mining_asset, materialize_enhanced_articles_asset from ops.ml_enrichment_ops import classify_protest_relevancy, get_ml_enrichment_files, store_ml_enrichment_files from resources.novacene_ml_resource import novacene_ml_api_client # Resources ################# DBT_PROFILES_DIR = file_relative_path(__file__, "./dw") DBT_PROJECT_DIR = file_relative_path(__file__, "./dw") snowflake_env_variables = config_from_files(['environments/snowflake_env_variables.yaml']) novacene_env_variables = config_from_files(['environments/novacene_env_variables.yaml']) my_dbt_resource = dbt_cli_resource.configured({ "profiles_dir": DBT_PROFILES_DIR, "project_dir": DBT_PROJECT_DIR}) my_novacene_client_client = novacene_ml_api_client.configured(novacene_env_variables) #Jobs ################ @job( resource_defs = { 'snowflake': snowflake_resource }, config = snowflake_env_variables ) def mine_gdelt_data(): # Mine data from GDELT
optimize.curve_fit(f=model_func, xdata=df.order_date.astype(np.int64), ydata=df.num_orders, p0=[10, 100])[0]) @asset(compute_kind="python", io_manager_key="pandas_io_manager") def predicted_orders( daily_order_summary: pd.DataFrame, order_forecast_model: Tuple[float, float]) -> pd.DataFrame: """Predicted orders for the next 30 days based on the fit paramters""" a, b = order_forecast_model start_date = daily_order_summary.order_date.max() future_dates = pd.date_range(start=start_date, end=start_date + pd.DateOffset(days=30)) predicted_data = model_func(x=future_dates.astype(np.int64), a=a, b=b) return pd.DataFrame({ "order_date": future_dates, "num_orders": predicted_data }) analytics_assets = AssetGroup( airbyte_assets + dbt_assets + [order_forecast_model, predicted_orders], resource_defs={ "airbyte": airbyte_resource.configured(AIRBYTE_CONFIG), "dbt": dbt_cli_resource.configured(DBT_CONFIG), "pandas_io_manager": pandas_io_manager.configured(PANDAS_IO_CONFIG), }, ).build_job("Assets")
import pandas as pd from dagster import MetadataValue, build_assets_job from dagster.utils import file_relative_path from dagster_dbt import dbt_cli_resource from dagster_dbt.asset_defs import load_assets_from_dbt_manifest from hacker_news_assets.resources import RESOURCES_PROD, RESOURCES_STAGING from hacker_news_assets.resources.snowflake_io_manager import ( SHARED_SNOWFLAKE_CONF, connect_snowflake, ) DBT_PROJECT_DIR = file_relative_path(__file__, "../../hacker_news_dbt") DBT_PROFILES_DIR = DBT_PROJECT_DIR + "/config" dbt_staging_resource = dbt_cli_resource.configured({ "profiles-dir": DBT_PROFILES_DIR, "project-dir": DBT_PROJECT_DIR, "target": "staging" }) dbt_prod_resource = dbt_cli_resource.configured({ "profiles_dir": DBT_PROFILES_DIR, "project_dir": DBT_PROJECT_DIR, "target": "prod" }) def asset_metadata(_context, model_info): config = dict(SHARED_SNOWFLAKE_CONF) config["schema"] = model_info["schema"] with connect_snowflake(config=config) as con: df = pd.read_sql(f"SELECT * FROM {model_info['name']} LIMIT 5", con=con)