def test_dbt_run_with_airflow_connection_and_profile(profiles_file, dbt_project_file, model_files, airflow_conns): """Test execution of DbtRunOperator with a connection and a profiles file. An Airflow connection target should still be usable even in the presence of profiles file, and vice-versa. """ all_targets = airflow_conns + ("test", ) for target in all_targets: op = DbtRunOperator( task_id="dbt_task", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, select=[str(m.stem) for m in model_files], target=target, ) execution_results = op.execute({}) run_result = execution_results["results"][0] assert run_result["status"] == RunStatus.Success assert op.profiles_dir == profiles_file.parent assert op.target == target
def test_dbt_run_mocked_all_args(): """Test mocked dbt run call with all arguments.""" op = DbtRunOperator( task_id="dbt_task", project_dir="/path/to/project/", profiles_dir="/path/to/profiles/", profile="dbt-profile", target="dbt-target", vars={"target": "override"}, log_cache_events=True, full_refresh=True, models=["/path/to/model.sql", "+/another/model.sql+2"], fail_fast=True, threads=3, exclude=["/path/to/model/to/exclude.sql"], selector_name=["a-selector"], state="/path/to/state/", ) assert op.command == "run" config = op.get_dbt_config() assert isinstance(config, RunTaskConfig) is True assert config.project_dir == "/path/to/project/" assert config.profiles_dir == "/path/to/profiles/" assert config.profile == "dbt-profile" assert config.target == "dbt-target" assert config.parsed_vars == {"target": "override"} assert config.log_cache_events is True assert config.full_refresh is True assert config.fail_fast is True assert config.threads == 3 assert config.select == ["/path/to/model.sql", "+/another/model.sql+2"] assert config.exclude == ["/path/to/model/to/exclude.sql"] assert config.selector_name == ["a-selector"] assert config.state == Path("/path/to/state/")
def test_dbt_run_models_with_project_from_s3(s3_bucket, s3_hook, profiles_file, dbt_project_file, model_files): """Test execution of DbtRunOperator with a project from s3.""" bucket = s3_hook.get_bucket(s3_bucket) with open(dbt_project_file) as pf: project_content = pf.read() bucket.put_object(Key="project/dbt_project.yml", Body=project_content.encode()) for model_file in model_files: with open(model_file) as mf: model_content = mf.read() bucket.put_object(Key=f"project/models/{model_file.name}", Body=model_content.encode()) op = DbtRunOperator( task_id="dbt_task", project_dir=f"s3://{s3_bucket}/project/", profiles_dir=profiles_file.parent, models=[str(m.stem) for m in model_files], do_xcom_push=True, ) execution_results = op.execute({}) run_result = execution_results["results"][0] assert run_result["status"] == RunStatus.Success
def test_dbt_run_fails_with_non_existent_project(profiles_file, dbt_project_file): """Test dbt run operator raises an exception when failing due to missing project.""" op = DbtRunOperator( task_id="dbt_task", project_dir="/home/fake/project", profiles_dir="/home/fake/profiles/", full_refresh=True, ) with pytest.raises(AirflowException): op.execute({})
def test_dbt_run_fails_with_malformed_sql(profiles_file, dbt_project_file, broken_file): """Test dbt run operator raises an exception when failing due to a broken file.""" op = DbtRunOperator( task_id="dbt_task", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, models=[str(broken_file.stem)], full_refresh=True, ) with pytest.raises(AirflowException): op.execute({})
def test_dbt_run_models(profiles_file, dbt_project_file, model_files): """Test execution of DbtRunOperator with all models.""" op = DbtRunOperator( task_id="dbt_task", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, models=[str(m.stem) for m in model_files], do_xcom_push=True, ) execution_results = op.execute({}) run_result = execution_results["results"][0] assert run_result["status"] == RunStatus.Success
def test_dbt_run_non_existent_model(profiles_file, dbt_project_file, model_files): """Test execution of DbtRunOperator with a non-existent model.""" op = DbtRunOperator( task_id="dbt_task", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, models=["fake"], full_refresh=True, do_xcom_push=True, ) execution_results = op.execute({}) assert len(execution_results["results"]) == 0 assert isinstance(json.dumps(execution_results), str)
def test_dbt_run_models_full_refresh(profiles_file, dbt_project_file, model_files): """Test dbt run operator with all model files and full-refresh.""" op = DbtRunOperator( task_id="dbt_task", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, models=[str(m.stem) for m in model_files], full_refresh=True, do_xcom_push=True, ) execution_results = op.execute({}) run_result = execution_results["results"][0] assert run_result["status"] == RunStatus.Success assert isinstance(json.dumps(execution_results), str)
def test_dbt_run_models_with_airflow_connection(dbt_project_file, model_files, airflow_conns): """Test execution of DbtRunOperator with an Airflow connection target.""" for conn_id in airflow_conns: op = DbtRunOperator( task_id="dbt_task", project_dir=dbt_project_file.parent, models=[str(m.stem) for m in model_files], target=conn_id, ) execution_results = op.execute({}) run_result = execution_results["results"][0] assert run_result["status"] == RunStatus.Success assert op.profiles_dir is None assert op.target == conn_id
def test_dbt_run_models_from_s3(s3_bucket, s3_hook, profiles_file, dbt_project_file, model_files): """Test execution of DbtRunOperator with all models from s3.""" bucket = s3_hook.get_bucket(s3_bucket) with open(dbt_project_file) as pf: project_content = pf.read() bucket.put_object(Key="project/dbt_project.yml", Body=project_content.encode()) with open(profiles_file) as pf: profiles_content = pf.read() bucket.put_object(Key="project/profiles.yml", Body=profiles_content.encode()) for model_file in model_files: with open(model_file) as mf: model_content = mf.read() bucket.put_object(Key=f"project/models/{model_file.name}", Body=model_content.encode()) op = DbtRunOperator( task_id="dbt_task", project_dir=f"s3://{s3_bucket}/project/", profiles_dir=f"s3://{s3_bucket}/project/", models=[str(m.stem) for m in model_files], do_xcom_push=True, do_xcom_push_artifacts=["manifest.json", "run_results.json"], ) ti = FakeTaskInstance() execution_results = op.execute({"ti": ti}) run_result = execution_results["results"][0] assert run_result["status"] == RunStatus.Success assert "manifest.json" in ti.xcom assert "run_results.json" in ti.xcom assert ti.xcom["run_results.json"][0]["results"][0]["status"] == "success"
def basic_dag( dbt_project_file, profiles_file, model_files, seed_files, singular_tests_files, generic_tests_files, ): with DAG( dag_id="dbt_dag", start_date=DATA_INTERVAL_START, catchup=False, schedule_interval=None, tags=["context-manager", "dbt"], ) as dag: dbt_seed = DbtSeedOperator( task_id="dbt_seed", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, do_xcom_push_artifacts=["run_results.json"], target="test", ) dbt_run = DbtRunOperator( task_id="dbt_run", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, target="test", do_xcom_push_artifacts=["run_results.json"], full_refresh=True, ) dbt_test = DbtTestOperator( task_id="dbt_test", project_dir=dbt_project_file.parent, profiles_dir=profiles_file.parent, do_xcom_push_artifacts=["run_results.json"], target="test", ) dbt_seed >> dbt_run >> dbt_test yield dag session = settings.Session() session.query(DagRun).delete()
def test_dbt_run_uses_correct_argument_according_to_version(): """Test if dbt run operator sets the proper attribute based on dbt version.""" op = DbtRunOperator( task_id="dbt_task", project_dir="/path/to/project/", profiles_dir="/path/to/profiles/", profile="dbt-profile", target="dbt-target", vars={"target": "override"}, log_cache_events=True, full_refresh=True, models=["/path/to/model.sql", "+/another/model.sql+2"], fail_fast=True, threads=3, exclude=["/path/to/model/to/exclude.sql"], selector_name=["a-selector"], state="/path/to/state/", ) assert op.select == ["/path/to/model.sql", "+/another/model.sql+2"] assert getattr(op, "models", None) is None
def generate_dag(): @task def prepare_profiles_dir() -> str: return str(profiles_file.parent) @task def prepare_dbt_project_dir() -> str: return str(dbt_project_file.parent) profiles_dir = prepare_profiles_dir() dbt_project_dir = prepare_dbt_project_dir() dbt_seed = DbtSeedOperator( task_id="dbt_seed_taskflow", project_dir=dbt_project_dir, profiles_dir=profiles_dir, target="test", do_xcom_push_artifacts=["run_results.json"], ) dbt_run = DbtRunOperator( task_id="dbt_run_taskflow", project_dir=dbt_project_dir, profiles_dir=profiles_dir, target="test", full_refresh=True, do_xcom_push_artifacts=["run_results.json"], ) dbt_test = DbtTestOperator( task_id="dbt_test_taskflow", project_dir=dbt_project_dir, profiles_dir=profiles_dir, target="test", do_xcom_push_artifacts=["run_results.json"], ) dbt_seed >> dbt_run >> dbt_test
do_xcom_push_artifacts=["sources.json"], ) dbt_seed = DbtSeedOperator( task_id="dbt_seed", project_dir="/path/to/my/dbt/project/", profiles_dir="~/.dbt/", target="production", profile="my-project", ) dbt_run_incremental = DbtRunOperator( task_id="dbt_run_incremental_hourly", project_dir="/path/to/my/dbt/project/", profiles_dir="~/.dbt/", select=["tag:hourly,config.materialized:incremental"], exclude=["tag:deprecated"], target="production", profile="my-project", full_refresh=False, ) dbt_run = DbtRunOperator( task_id="dbt_run_hourly", project_dir="/path/to/my/dbt/project/", profiles_dir="~/.dbt/", select=["+tag:hourly"], exclude=["tag:deprecated,config.materialized:incremental"], target="production", profile="my-project", full_refresh=True, )
f"{longest_execute[1]} seconds!") print(f"{longest_compile[0]} took the longest to compile with a time of " f"{longest_compile[1]} seconds!") with DAG( dag_id="example_dbt_artifacts", schedule_interval="0 0 * * *", start_date=days_ago(1), catchup=False, dagrun_timeout=dt.timedelta(minutes=60), ) as dag: dbt_run = DbtRunOperator( task_id="dbt_run_daily", project_dir="/path/to/my/dbt/project/", profiles_dir="~/.dbt/", select=["+tag:daily"], exclude=["tag:deprecated"], target="production", profile="my-project", full_refresh=True, do_xcom_push_artifacts=["manifest.json", "run_results.json"], ) process_artifacts = PythonOperator( task_id="process_artifacts", python_callable=process_dbt_artifacts, provide_context=True, ) dbt_run >> process_artifacts
host="localhost", login="******", port=5432, schema="my_dbt_schema", password="******", # pragma: allowlist secret # Other dbt parameters can be added as extras extra=json.dumps(dict(threads=4, sslmode="require")), ) session.add(my_conn) session.commit() with DAG( dag_id="example_airflow_connection", schedule_interval="0 * * * *", start_date=days_ago(1), catchup=False, dagrun_timeout=dt.timedelta(minutes=60), ) as dag: dbt_run = DbtRunOperator( task_id="dbt_run_hourly", target="my_db_connection", # Profiles file is not needed as we are using an Airflow connection. # If a profiles file is used, the Airflow connection will be merged to the # existing targets profiles_dir=None, # Defaults to None so this may be omitted. project_dir="/path/to/my/dbt/project/", select=["+tag:hourly"], exclude=["tag:deprecated"], )
"""Sample basic DAG which dbt runs a project.""" import datetime as dt from airflow import DAG from airflow.utils.dates import days_ago from airflow_dbt_python.operators.dbt import DbtRunOperator with DAG( dag_id="example_basic_dbt", schedule_interval=None, start_date=days_ago(1), catchup=False, dagrun_timeout=dt.timedelta(minutes=60), default_args={"retries": 2}, ) as dag: dbt_run = DbtRunOperator( task_id="dbt_run_hourly", project_dir="/path/to/my/dbt/project/", profiles_dir="~/.dbt/", select=["+tag:hourly"], exclude=["tag:deprecated"], target="production", profile="my-project", full_refresh=False, )
from airflow_dbt_python.operators.dbt import DbtDocsGenerateOperator, DbtRunOperator with DAG( dag_id="example_basic_dbt_run_with_s3", schedule_interval=None, start_date=days_ago(1), catchup=False, dagrun_timeout=dt.timedelta(minutes=60), ) as dag: # Project files will be pulled from "s3://my-bucket/dbt/profiles/key/prefix/" dbt_run = DbtRunOperator( task_id="dbt_run_hourly", project_dir="s3://my-bucket/dbt/project/key/prefix/", profiles_dir="s3://my-bucket/dbt/profiles/key/prefix/", select=["+tag:hourly"], exclude=["tag:deprecated"], target="production", profile="my-project", full_refresh=False, ) # Documentation files (target/manifest.json, target/index.html, and # target/catalog.json) will be pushed back to S3 after compilation is done. dbt_docs = DbtDocsGenerateOperator( task_id="dbt_docs", project_dir="s3://my-bucket/dbt/project/key/prefix/", profiles_dir="s3://my-bucket/dbt/profiles/key/prefix/", ) dbt_run >> dbt_docs