def repeated_subdag( parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id, additional_params=None, num_partitions=5, date_partition_parameter="submission_date", ): dag = DAG( "%s.%s" % (parent_dag_name, child_dag_name), default_args=default_args, schedule_interval=schedule_interval, ) # This task runs first and replaces the relevant partition, followed # by the next tasks that append to the same partition of the same table. NUM_SAMPLE_IDS = 100 PARTITION_SIZE = NUM_SAMPLE_IDS / num_partitions task_0 = bigquery_etl_query( task_id="{dag_name}_0".format(dag_name=child_dag_name), destination_table="{dag_name}_v1".format(dag_name=child_dag_name), dataset_id=dataset_id, project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], depends_on_past=True, parameters=merge_params(0, PARTITION_SIZE - 1, additional_params), date_partition_parameter=date_partition_parameter, arguments=("--replace", ), dag=dag, ) for partition in range(1, num_partitions): min_param = partition * PARTITION_SIZE max_param = min_param + PARTITION_SIZE - 1 task = bigquery_etl_query( task_id="{}_{}".format(child_dag_name, partition), destination_table="{dag_name}_v1".format(dag_name=child_dag_name), dataset_id=dataset_id, project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], depends_on_past=True, parameters=merge_params(min_param, max_param, additional_params), date_partition_parameter=date_partition_parameter, arguments=( "--append_table", "--noreplace", ), dag=dag, ) task_0 >> task return dag
def repeated_subdag(parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id): dag = DAG( "%s.%s" % (parent_dag_name, child_dag_name), default_args=default_args, schedule_interval=schedule_interval, ) NUM_PARTITIONS = 4 NUM_SAMPLE_IDS = 100 PARTITION_SIZE = NUM_SAMPLE_IDS / NUM_PARTITIONS task_0 = bigquery_etl_query( task_id="{dag_name}_0".format(dag_name=child_dag_name), destination_table="{dag_name}_v1".format(dag_name=child_dag_name), dataset_id=dataset_id, project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], depends_on_past=True, date_partition_parameter=None, parameters=( "min_sample_id:INT64:0", "max_sample_id:INT64:{}".format(PARTITION_SIZE - 1), ), arguments=("--replace", ), dag=dag, ) for partition in range(1, NUM_PARTITIONS): min_param = partition * PARTITION_SIZE max_param = min_param + PARTITION_SIZE - 1 task = bigquery_etl_query( task_id="{}_{}".format(child_dag_name, partition), destination_table="{dag_name}_v1".format(dag_name=child_dag_name), dataset_id=dataset_id, project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], depends_on_past=True, date_partition_parameter=None, parameters=( "min_sample_id:INT64:{}".format(min_param), "max_sample_id:INT64:{}".format(max_param), ), arguments=( "--append_table", "--noreplace", ), dag=dag, ) task_0 >> task return dag
def extract_channel_subdag( parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id, channel, ): dag = DAG( dag_id="{}.{}".format(parent_dag_name, child_dag_name), default_args=default_args, schedule_interval=schedule_interval, ) bq_extract_table = "glam_extract_firefox_{}_v1".format(channel) etl_query = bigquery_etl_query( task_id="glam_client_probe_counts_{}_extract".format(channel), destination_table=bq_extract_table, dataset_id=dataset_id, project_id=project_id, owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter=None, arguments=("--replace", ), sql_file_path= "sql/moz-fx-data-shared-prod/{}/glam_client_probe_counts_extract_v1/query.sql" .format(dataset_id), parameters=("channel:STRING:{}".format(channel), ), dag=dag, ) gcs_delete = GoogleCloudStorageDeleteOperator( task_id="glam_gcs_delete_old_{}_extracts".format(channel), bucket_name=glam_bucket, prefix="aggs-desktop-{}".format(channel), google_cloud_storage_conn_id=gcp_conn.gcp_conn_id, dag=dag, ) gcs_destination = "gs://{bucket}/aggs-desktop-{channel}-*.csv".format( bucket=glam_bucket, channel=channel) bq2gcs = BigQueryToCloudStorageOperator( task_id="glam_extract_{}_to_csv".format(channel), source_project_dataset_table="{}.{}.{}".format(project_id, dataset_id, bq_extract_table), destination_cloud_storage_uris=gcs_destination, bigquery_conn_id=gcp_conn.gcp_conn_id, export_format="CSV", print_header=False, dag=dag, ) etl_query >> gcs_delete >> bq2gcs return dag
def extract_channel_subdag( parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id, channel, ): dag = DAG( dag_id="{}.{}".format(parent_dag_name, child_dag_name), default_args=default_args, schedule_interval=schedule_interval, ) bq_extract_table = "glam_client_probe_counts_{}_extract_v1".format(channel) glam_client_probe_counts_extract = bigquery_etl_query( task_id="glam_client_probe_counts_{}_extract".format(channel), destination_table=bq_extract_table, dataset_id=dataset_id, project_id=project_id, owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter=None, arguments=("--replace", ), dag=dag, ) glam_gcs_delete_old_extracts = GoogleCloudStorageDeleteOperator( task_id="glam_gcs_delete_old_{}_extracts".format(channel), bucket_name=glam_bucket, prefix="extract-desktop-{}".format(channel), google_cloud_storage_conn_id=gcp_conn.gcp_conn_id, dag=dag, ) gcs_destination = "gs://{}/extract-desktop-{}-*.csv".format( glam_bucket, channel) glam_extract_to_csv = BigQueryToCloudStorageOperator( task_id="glam_extract_{}_to_csv".format(channel), source_project_dataset_table="{}.{}.{}".format(project_id, dataset_id, bq_extract_table), destination_cloud_storage_uris=gcs_destination, bigquery_conn_id=gcp_conn.gcp_conn_id, export_format="CSV", print_header=False, dag=dag, ) glam_client_probe_counts_extract >> glam_gcs_delete_old_extracts >> glam_extract_to_csv return dag
def extract_user_counts(parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id): dag = DAG( dag_id="{}.{}".format(parent_dag_name, child_dag_name), default_args=default_args, schedule_interval=schedule_interval, ) bq_extract_table = "glam_user_counts_extract_v1" etl_query = bigquery_etl_query( task_id="glam_user_counts_extract", destination_table=bq_extract_table, dataset_id=dataset_id, project_id=project_id, owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter=None, arguments=("--replace", ), dag=dag, ) gcs_delete = GoogleCloudStorageDeleteOperator( task_id="glam_gcs_delete_count_extracts", bucket_name=glam_bucket, prefix="glam-extract-firefox-counts", google_cloud_storage_conn_id=gcp_conn.gcp_conn_id, dag=dag, ) gcs_destination = "gs://{}/glam-extract-firefox-counts.csv".format( glam_bucket) bq2gcs = BigQueryToCloudStorageOperator( task_id="glam_extract_user_counts_to_csv", source_project_dataset_table="{}.{}.{}".format(project_id, dataset_id, bq_extract_table), destination_cloud_storage_uris=gcs_destination, bigquery_conn_id=gcp_conn.gcp_conn_id, export_format="CSV", print_header=False, dag=dag, ) etl_query >> gcs_delete >> bq2gcs return dag
def histogram_aggregates_subdag(parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id): GLAM_HISTOGRAM_AGGREGATES_SUBDAG = "%s.%s" % (parent_dag_name, child_dag_name) default_args["depends_on_past"] = True dag = DAG( GLAM_HISTOGRAM_AGGREGATES_SUBDAG, default_args=default_args, schedule_interval=schedule_interval, ) clients_histogram_aggregates_new = bigquery_etl_query( task_id="clients_histogram_aggregates_new", destination_table="clients_histogram_aggregates_new_v1", dataset_id=dataset_id, project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter=None, parameters=("submission_date:DATE:{{ds}}", ), arguments=("--replace", ), dag=dag, ) clients_histogram_aggregates_final = SubDagOperator( subdag=repeated_subdag( GLAM_HISTOGRAM_AGGREGATES_SUBDAG, GLAM_HISTOGRAM_AGGREGATES_FINAL_SUBDAG, default_args, dag.schedule_interval, dataset_id, ), task_id=GLAM_HISTOGRAM_AGGREGATES_FINAL_SUBDAG, executor=get_default_executor(), dag=dag, ) clients_histogram_aggregates_new >> clients_histogram_aggregates_final return dag
# over all the tables in _live datasets into _stable datasets except those # that are specifically used in another DAG. copy_deduplicate_all = bigquery_etl_copy_deduplicate( task_id="copy_deduplicate_all", target_project_id="moz-fx-data-shared-prod", # Any table listed here under except_tables _must_ have a corresponding # copy_deduplicate job in another DAG. except_tables=["telemetry_live.main_v4"]) # Events. event_events = bigquery_etl_query( task_id="event_events", project_id="moz-fx-data-shared-prod", destination_table="event_events_v1", dataset_id="telemetry_derived", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], arguments=('--schema_update_option=ALLOW_FIELD_ADDITION', ), ) copy_deduplicate_all >> event_events # Experiment enrollment aggregates chain (depends on events) wait_for_main_events = ExternalTaskSensor( task_id="wait_for_main_events", external_dag_id="main_summary", external_task_id="bq_main_events", dag=dag)
"retry_delay": datetime.timedelta(seconds=1800), "email_on_failure": True, "email_on_retry": True, "retries": 2, } with DAG("bqetl_addons", default_args=default_args, schedule_interval="0 1 * * *") as dag: telemetry_derived__addons_daily__v1 = bigquery_etl_query( task_id="telemetry_derived__addons_daily__v1", destination_table="addons_daily_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) telemetry_derived__addons__v2 = bigquery_etl_query( task_id="telemetry_derived__addons__v2", destination_table="addons_v2", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
"email_on_failure": True, "email_on_retry": True, "retries": 1, } with DAG( "bqetl_error_aggregates", default_args=default_args, schedule_interval=datetime.timedelta(seconds=10800), doc_md=docs, ) as dag: telemetry_derived__error_aggregates__v1 = bigquery_etl_query( task_id="telemetry_derived__error_aggregates__v1", destination_table="error_aggregates_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, )
"retry_delay": datetime.timedelta(seconds=1800), "email_on_failure": True, "email_on_retry": True, "retries": 2, } with DAG( "bqetl_search", default_args=default_args, schedule_interval="0 1 * * *" ) as dag: search_derived__search_metric_contribution__v1 = bigquery_etl_query( task_id="search_derived__search_metric_contribution__v1", destination_table="search_metric_contribution_v1", dataset_id="search_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) search_derived__search_aggregates__v8 = bigquery_etl_query( task_id="search_derived__search_aggregates__v8", destination_table="search_aggregates_v8", dataset_id="search_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
with DAG('experiments_live', default_args=default_args, concurrency=4, max_active_runs=1, schedule_interval="*/5 * * * *") as dag: docker_image = "mozilla/bigquery-etl:latest" experiment_enrollment_aggregates_recents = bigquery_etl_query( task_id="experiment_enrollment_aggregates_recents", destination_table="experiment_enrollment_aggregates_recents_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter=None, depends_on_past=True, parameters=["submission_timestamp:TIMESTAMP:{{ts}}"], dag=dag, is_delete_operator_pod=True, ) experiment_search_aggregates_recents = bigquery_etl_query( task_id="experiment_search_aggregates_recents", destination_table="experiment_search_aggregates_recents_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter=None,
"retries": 2, } with DAG( "bqetl_fenix_event_rollup", default_args=default_args, schedule_interval="0 2 * * *", doc_md=docs, ) as dag: fenix_derived__event_types__v1 = bigquery_etl_query( task_id="fenix_derived__event_types__v1", destination_table="event_types_v1", dataset_id="fenix_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter=None, depends_on_past=False, parameters=["submission_date:DATE:{{ds}}"], dag=dag, ) fenix_derived__event_types_history__v1 = bigquery_etl_query( task_id="fenix_derived__event_types_history__v1", destination_table="event_types_history_v1", dataset_id="fenix_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=True,
# If a task fails, retry it once after waiting at least 5 minutes 'retries': 1, 'retry_delay': datetime.timedelta(minutes=10), } dag_name = 'fxa_events' with models.DAG( dag_name, # Continue to run DAG once per day schedule_interval='0 10 * * *', default_args=default_args) as dag: fxa_auth_events = bigquery_etl_query( task_id='fxa_auth_events', destination_table='fxa_auth_events_v1', dataset_id='telemetry', arguments=('--schema_update_option=ALLOW_FIELD_ADDITION',), ) fxa_auth_bounce_events = bigquery_etl_query( task_id='fxa_auth_bounce_events', destination_table='fxa_auth_bounce_events_v1', dataset_id='telemetry', arguments=('--schema_update_option=ALLOW_FIELD_ADDITION',), ) fxa_content_events = bigquery_etl_query( task_id='fxa_content_events', destination_table='fxa_content_events_v1', dataset_id='telemetry', arguments=('--schema_update_option=ALLOW_FIELD_ADDITION',),
"email": ["*****@*****.**", "*****@*****.**"], "depends_on_past": False, "retry_delay": datetime.timedelta(seconds=300), "email_on_failure": True, "email_on_retry": True, "retries": 1, } with DAG("bqetl_core", default_args=default_args, schedule_interval="0 2 * * *") as dag: telemetry_derived__core_clients_daily__v1 = bigquery_etl_query( task_id="telemetry_derived__core_clients_daily__v1", destination_table="core_clients_daily_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, priority_weight=75, dag=dag, ) telemetry_derived__core_clients_last_seen__v1 = bigquery_etl_query( task_id="telemetry_derived__core_clients_last_seen__v1", destination_table="core_clients_last_seen_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=True,
2, } with DAG("bqetl_main_summary", default_args=default_args, schedule_interval="0 2 * * *") as dag: firefox_desktop_exact_mau28_by_client_count_dimensions = bigquery_etl_query( task_id="firefox_desktop_exact_mau28_by_client_count_dimensions", destination_table= "firefox_desktop_exact_mau28_by_client_count_dimensions_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) firefox_desktop_exact_mau28_by_dimensions = bigquery_etl_query( task_id="firefox_desktop_exact_mau28_by_dimensions", destination_table="firefox_desktop_exact_mau28_by_dimensions_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**",
default_args = { "owner": "*****@*****.**", "start_date": datetime.datetime(2020, 6, 29, 0, 0), "end_date": None, "email": ["*****@*****.**", "*****@*****.**"], "depends_on_past": False, "retry_delay": datetime.timedelta(seconds=1800), "email_on_failure": True, "email_on_retry": True, "retries": 2, } with DAG( "bqetl_deletion_request_volume", default_args=default_args, schedule_interval="0 1 * * *", ) as dag: monitoring_derived__deletion_request_volume__v1 = bigquery_etl_query( task_id="monitoring_derived__deletion_request_volume__v1", destination_table="deletion_request_volume_v1", dataset_id="monitoring_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, )
"retry_delay": datetime.timedelta(seconds=1800), "email_on_failure": True, "email_on_retry": True, "retries": 2, } with DAG( "bqetl_fenix_event_rollup", default_args=default_args, schedule_interval="0 2 * * *" ) as dag: org_mozilla_firefox_derived__event_types__v1 = bigquery_etl_query( task_id="org_mozilla_firefox_derived__event_types__v1", destination_table="event_types_v1", dataset_id="org_mozilla_firefox_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=True, dag=dag, ) org_mozilla_firefox_derived__events_daily__v1 = bigquery_etl_query( task_id="org_mozilla_firefox_derived__events_daily__v1", destination_table="events_daily_v1", dataset_id="org_mozilla_firefox_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
"retries": 1, } with DAG( "bqetl_fxa_events", default_args=default_args, schedule_interval="30 1 * * *", doc_md=docs, ) as dag: firefox_accounts_derived__exact_mau28__v1 = bigquery_etl_query( task_id="firefox_accounts_derived__exact_mau28__v1", destination_table="exact_mau28_v1", dataset_id="firefox_accounts_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) firefox_accounts_derived__fxa_auth_bounce_events__v1 = bigquery_etl_query( task_id="firefox_accounts_derived__fxa_auth_bounce_events__v1", destination_table="fxa_auth_bounce_events_v1", dataset_id="firefox_accounts_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
"retry_delay": datetime.timedelta(seconds=1800), "email_on_failure": True, "email_on_retry": True, "retries": 2, } with DAG( "bqetl_asn_aggregates", default_args=default_args, schedule_interval="0 2 * * *" ) as dag: telemetry_derived__asn_aggregates__v1 = bigquery_etl_query( task_id="telemetry_derived__asn_aggregates__v1", destination_table="asn_aggregates_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, parameters=["n_clients:INT64:500"], dag=dag, ) wait_for_bq_main_events = ExternalTaskSensor( task_id="wait_for_bq_main_events", external_dag_id="copy_deduplicate", external_task_id="bq_main_events", execution_delta=datetime.timedelta(seconds=3600), check_existence=True, mode="reschedule", pool="DATA_ENG_EXTERNALTASKSENSOR", )
"email_on_retry": True, "retries": 2, } with DAG( "bqetl_google_analytics_derived", default_args=default_args, schedule_interval="0 23 * * *", ) as dag: ga_derived__blogs_daily_summary__v1 = bigquery_etl_query( task_id="ga_derived__blogs_daily_summary__v1", destination_table="blogs_daily_summary_v1", dataset_id="ga_derived", project_id="moz-fx-data-marketing-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) ga_derived__blogs_empty_check__v1 = bigquery_etl_query( task_id="ga_derived__blogs_empty_check__v1", destination_table=None, dataset_id="ga_derived", project_id="moz-fx-data-marketing-prod", owner="*****@*****.**", email=["*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
"retries": 1, } with DAG("bqetl_vrbrowser", default_args=default_args, schedule_interval="0 2 * * *") as dag: org_mozilla_vrbrowser_derived__baseline_daily__v1 = bigquery_etl_query( task_id="org_mozilla_vrbrowser_derived__baseline_daily__v1", destination_table="baseline_daily_v1", dataset_id="org_mozilla_vrbrowser_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) org_mozilla_vrbrowser_derived__metrics_daily__v1 = bigquery_etl_query( task_id="org_mozilla_vrbrowser_derived__metrics_daily__v1", destination_table="metrics_daily_v1", dataset_id="org_mozilla_vrbrowser_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=[
dag_name, schedule_interval="0 2 * * *", default_args=default_args) as dag: wait_for_copy_deduplicate_main_ping = ExternalTaskSensor( task_id="wait_for_copy_deduplicate_main_ping", external_dag_id="copy_deduplicate", external_task_id="copy_deduplicate_main_ping", execution_delta=datetime.timedelta(hours=1), dag=dag, ) fission_monitoring_main_v1 = bigquery_etl_query( task_id="fission_monitoring_main_v1", project_id="moz-fx-data-shared-prod", destination_table="fission_monitoring_main_v1", dataset_id="telemetry_derived", arguments=('--schema_update_option=ALLOW_FIELD_ADDITION',), ) wait_for_copy_deduplicate_crash_ping = ExternalTaskSensor( task_id="wait_for_copy_deduplicate_crash_ping", external_dag_id="copy_deduplicate", external_task_id="copy_deduplicate_all", execution_delta=datetime.timedelta(hours=1), dag=dag, ) fission_monitoring_crash_v1 = bigquery_etl_query( task_id="fission_monitoring_crash_v1", project_id="moz-fx-data-shared-prod",
command=[ "python", "sql/moz-fx-data-shared-prod/monitoring_derived/column_size_v1/query.py", ] + ["--date", "{{ ds }}"], docker_image="gcr.io/moz-fx-data-airflow-prod-88e0/bigquery-etl:latest", owner="*****@*****.**", email=["*****@*****.**"], ) monitoring_derived__schema_error_counts__v2 = bigquery_etl_query( task_id="monitoring_derived__schema_error_counts__v2", destination_table="schema_error_counts_v2", dataset_id="monitoring_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) monitoring_derived__stable_table_sizes__v1 = gke_command( task_id="monitoring_derived__stable_table_sizes__v1", command=[ "python", "sql/moz-fx-data-shared-prod/monitoring_derived/stable_table_sizes_v1/query.py", ] + ["--date", "{{ ds }}"], docker_image="gcr.io/moz-fx-data-airflow-prod-88e0/bigquery-etl:latest", owner="*****@*****.**",
"retries": 2, } with DAG( "bqetl_org_mozilla_fenix_derived", default_args=default_args, schedule_interval="0 2 * * *", doc_md=docs, ) as dag: org_mozilla_fenix_derived__geckoview_version__v1 = bigquery_etl_query( task_id="org_mozilla_fenix_derived__geckoview_version__v1", destination_table="geckoview_version_v1", dataset_id="org_mozilla_fenix_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) wait_for_copy_deduplicate_all = ExternalTaskSensor( task_id="wait_for_copy_deduplicate_all", external_dag_id="copy_deduplicate", external_task_id="copy_deduplicate_all", execution_delta=datetime.timedelta(seconds=3600), check_existence=True, mode="reschedule", pool="DATA_ENG_EXTERNALTASKSENSOR", )
"retry_delay": datetime.timedelta(seconds=300), "email_on_failure": True, "email_on_retry": True, "retries": 1, } with DAG("bqetl_messaging_system", default_args=default_args, schedule_interval="0 2 * * *") as dag: messaging_system_derived__cfr_exact_mau28_by_dimensions__v1 = bigquery_etl_query( task_id="messaging_system_derived__cfr_exact_mau28_by_dimensions__v1", destination_table="cfr_exact_mau28_by_dimensions_v1", dataset_id="messaging_system_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) messaging_system_derived__cfr_users_daily__v1 = bigquery_etl_query( task_id="messaging_system_derived__cfr_users_daily__v1", destination_table="cfr_users_daily_v1", dataset_id="messaging_system_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
"retries": 2, } with DAG( "bqetl_experiments_daily", default_args=default_args, schedule_interval="0 3 * * *", doc_md=docs, ) as dag: experiment_enrollment_daily_active_population = bigquery_etl_query( task_id="experiment_enrollment_daily_active_population", destination_table="experiment_enrollment_daily_active_population_v1", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter=None, depends_on_past=False, dag=dag, ) monitoring__query_cost__v1 = bigquery_etl_query( task_id="monitoring__query_cost__v1", destination_table="query_cost_v1", dataset_id="monitoring", project_id="moz-fx-data-experiments", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
"retries": 2, } with DAG( "bqetl_internet_outages", default_args=default_args, schedule_interval="0 3 * * *", doc_md=docs, ) as dag: internet_outages__global_outages__v1 = bigquery_etl_query( task_id="internet_outages__global_outages__v1", destination_table="global_outages_v1", dataset_id="internet_outages", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) wait_for_copy_deduplicate_all = ExternalTaskSensor( task_id="wait_for_copy_deduplicate_all", external_dag_id="copy_deduplicate", external_task_id="copy_deduplicate_all", execution_delta=datetime.timedelta(seconds=7200), check_existence=True, mode="reschedule", pool="DATA_ENG_EXTERNALTASKSENSOR", )
"retry_delay": datetime.timedelta(seconds=1800), "email_on_failure": True, "email_on_retry": True, "retries": 2, } with DAG( "bqetl_amo_stats", default_args=default_args, schedule_interval="0 3 * * *" ) as dag: amo_dev__amo_stats_dau__v2 = bigquery_etl_query( task_id="amo_dev__amo_stats_dau__v2", destination_table="amo_stats_dau_v2", dataset_id="amo_dev", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) amo_dev__amo_stats_installs__v3 = bigquery_etl_query( task_id="amo_dev__amo_stats_installs__v3", destination_table="amo_stats_installs_v3", dataset_id="amo_dev", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,
with DAG( "bqetl_search", default_args=default_args, schedule_interval="0 3 * * *", doc_md=docs, ) as dag: search_derived__search_aggregates__v8 = bigquery_etl_query( task_id="search_derived__search_aggregates__v8", destination_table="search_aggregates_v8", dataset_id="search_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", ], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) search_derived__search_clients_daily__v8 = bigquery_etl_query( task_id="search_derived__search_clients_daily__v8", destination_table="search_clients_daily_v8", dataset_id="search_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"],
"retry_delay": datetime.timedelta(seconds=300), "email_on_failure": True, "email_on_retry": True, "retries": 1, } with DAG( "bqetl_gud", default_args=default_args, schedule_interval="0 3 * * *", doc_md=docs ) as dag: telemetry_derived__smoot_usage_desktop__v2 = bigquery_etl_query( task_id="telemetry_derived__smoot_usage_desktop__v2", destination_table="smoot_usage_desktop_v2", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False, dag=dag, ) telemetry_derived__smoot_usage_desktop_compressed__v2 = bigquery_etl_query( task_id="telemetry_derived__smoot_usage_desktop_compressed__v2", destination_table="smoot_usage_desktop_compressed_v2", dataset_id="telemetry_derived", project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter="submission_date", depends_on_past=False,