def add_search_rollup(dag, mode, instance_count, upstream=None): """Create a search rollup for a particular date date This can be called with an optional task passed into `upstream`. The rollup job will inherit the default values of the referenced DAG. """ search_rollup = EMRSparkOperator( task_id="search_rollup_{}".format(mode), job_name="{} search rollup".format(mode).title(), owner="*****@*****.**", email=[ '*****@*****.**', '*****@*****.**', '*****@*****.**', ], execution_timeout=timedelta(hours=4), instance_count=instance_count, disable_on_dev=True, env=mozetl_envvar( "search_rollup", { "start_date": "{{ ds_nodash }}", "mode": mode, "bucket": "net-mozaws-prod-us-west-2-pipeline-analysis", "prefix": "spenrose/search/to_vertica", }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag) if upstream: search_rollup.set_upstream(upstream)
def add_search_rollup(dag, mode, instance_count, upstream=None): """Create a search rollup for a particular date date""" search_rollup = EMRSparkOperator( task_id="search_rollup_{}".format(mode), job_name="{} search rollup".format(mode).title(), execution_timeout=timedelta(hours=4), instance_count=instance_count, env=mozetl_envvar("search_rollup", { "start_date": "{{ ds_nodash }}", "mode": mode, "bucket": "net-mozaws-prod-us-west-2-pipeline-analysis", "prefix": "spenrose/search/to_vertica", }), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag ) if upstream: search_rollup.set_upstream(upstream)
job_name="A placeholder for the implicit clients daily dependency", dag=dag, ) bgbb_fit = MozDatabricksSubmitRunOperator( task_id="bgbb_fit", job_name="Fit parameters for a BGBB model to determine active profiles", execution_timeout=timedelta(hours=2), instance_count=3, env=mozetl_envvar( "bgbb_fit", { "submission-date": "{{ next_ds }}", "model-win": "120", "start-params": "[0.387, 0.912, 0.102, 1.504]", "sample-ids": "[42]", "sample-fraction": "1.0", "penalizer-coef": "0.01", "bucket": "{{ task.__class__.private_output_bucket }}", "prefix": "bgbb/params/v1", }, dev_options={"model-win": "30"}, other={ "MOZETL_GIT_PATH": "https://github.com/wcbeard/bgbb_airflow.git", "MOZETL_EXTERNAL_MODULE": "bgbb_airflow", }, ), dag=dag, ) clients_daily_v6_dummy >> bgbb_fit
replace=["SAFE_CAST(sample_id AS INT64) AS sample_id"], ), task_id="main_summary_bigquery_load", dag=dag) engagement_ratio = EMRSparkOperator( task_id="engagement_ratio", job_name="Update Engagement Ratio", execution_timeout=timedelta(hours=6), instance_count=10, env=mozetl_envvar("engagement_ratio", options={ "input_bucket": "{{ task.__class__.private_output_bucket }}", "output_bucket": "net-mozaws-prod-us-west-2-pipeline-analysis" }, dev_options={ "output_bucket": "{{ task.__class__.private_output_bucket }}" }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="public", dag=dag) addons = EMRSparkOperator( task_id="addons", job_name="Addons View", execution_timeout=timedelta(hours=4), instance_count=3,
rename={"submission_date_s3": "submission_date"}, replace=["SAFE_CAST(sample_id AS INT64) AS sample_id"], ), task_id="main_events_bigquery_load", dag=dag) addon_aggregates = EMRSparkOperator( task_id="addon_aggregates", job_name="Addon Aggregates View", execution_timeout=timedelta(hours=8), owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], instance_count=10, env=mozetl_envvar( "addon_aggregates", { "date": "{{ ds_nodash }}", "input-bucket": "{{ task.__class__.private_output_bucket }}", "output-bucket": "{{ task.__class__.private_output_bucket }}" }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag) addon_aggregates_bigquery_load = SubDagOperator( subdag=load_to_bigquery( parent_dag_name=dag.dag_id, dag_name="addon_aggregates_bigquery_load", default_args=default_args, dataset_s3_bucket="telemetry-parquet", aws_conn_id="aws_dev_iam_s3", dataset="addons/agg", dataset_version="v2",
'email_on_retry': True, 'retries': 3, 'retry_delay': timedelta(minutes=30), } dag = DAG('taar_amodump', default_args=default_args, schedule_interval='@daily') amodump = EMRSparkOperator( task_id="taar_amodump", job_name="Dump AMO JSON blobs with oldest creation date", execution_timeout=timedelta(hours=1), instance_count=1, owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], env=mozetl_envvar("taar_amodump", {"date": "{{ ds_nodash }}"}, {'MOZETL_SUBMISSION_METHOD': 'python'}), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="private", dag=dag ) amowhitelist = EMRSparkOperator( task_id="taar_amowhitelist", job_name="Generate a whitelisted set of addons for TAAR", execution_timeout=timedelta(hours=1), instance_count=1, owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], env=mozetl_envvar("taar_amowhitelist", {},
from airflow import DAG from airflow.operators.moz_databricks import MozDatabricksSubmitRunOperator from datetime import datetime, timedelta from utils.mozetl import mozetl_envvar default_args = { 'owner': '*****@*****.**', 'depends_on_past': False, 'start_date': datetime(2018, 11, 26), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 3, 'retry_delay': timedelta(minutes=30), } dag = DAG('tab_spinner_severity', default_args=default_args, schedule_interval='@daily') update_tab_spinner_severity = MozDatabricksSubmitRunOperator( task_id="update_tab_spinner_severity", job_name="Tab Spinner Severity Job", execution_timeout=timedelta(hours=12), instance_count=12, env=mozetl_envvar("long_tab_spinners", {}), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag )
email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**" ], env={ "date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.public_output_bucket }}" }, uri= "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/hardware_report.sh", output_visibility="public", dag=dag) taar_lite_guidranking = EMRSparkOperator( task_id="taar_lite_guidranking", job_name="TAARlite Addon Ranking", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], execution_timeout=timedelta(hours=2), instance_count=4, env=mozetl_envvar("taar_lite_guidranking", {"date": "{{ ds_nodash }}"}, {'MOZETL_SUBMISSION_METHOD': 'spark'}), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="private", dag=dag) addon_recommender.set_upstream(longitudinal) game_hw_survey.set_upstream(longitudinal) taar_lite_guidranking.set_upstream(longitudinal)
"bucket": "{{ task.__class__.private_output_bucket }}" }), uri= "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py", dag=dag) addon_aggregates = EMRSparkOperator( task_id="addon_aggregates", job_name="Addon Aggregates View", execution_timeout=timedelta(hours=8), owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], instance_count=10, env=mozetl_envvar( "addon_aggregates", { "date": "{{ ds_nodash }}", "output-bucket": "{{ task.__class__.private_output_bucket }}" }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag) txp_mau_dau = EMRSparkOperator( task_id="txp_mau_dau", job_name="Test Pilot MAU DAU", execution_timeout=timedelta(hours=4), owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], instance_count=5, env={ "date": "{{ ds_nodash }}",
dataset_version="v1", gke_cluster_name="bq-load-gke-1", bigquery_dataset="telemetry_derived", ), task_id="sync_flat_view_bigquery_load", dag=dag) sync_bookmark_validation = EMRSparkOperator( task_id="sync_bookmark_validation", job_name="Sync Bookmark Validation", execution_timeout=timedelta(hours=2), instance_count=1, email=["*****@*****.**", "*****@*****.**"], env=mozetl_envvar( "sync_bookmark_validation", { "start_date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}", }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag) sync_bookmark_validation_total_per_day_bigquery_load = SubDagOperator( subdag=load_to_bigquery( parent_dag_name=dag.dag_id, dag_name="sync_bookmark_validation_total_per_day_bigquery_load", default_args=default_args, dataset_s3_bucket="telemetry-parquet", aws_conn_id="aws_dev_iam_s3", p2b_table_alias="sync_bmk_total_per_day_v1", dataset="sync/bmk_total_per_day",
'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 2, 'retry_delay': timedelta(minutes=30), } dag = DAG('churn', default_args=default_args, schedule_interval='0 0 * * 3') churn = EMRSparkOperator( task_id="churn", job_name="churn 7-day v3", execution_timeout=timedelta(hours=4), instance_count=10, env=mozetl_envvar("churn", { "start_date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}" }), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="public", dag=dag) churn_bigquery_load = SubDagOperator( subdag=load_to_bigquery( parent_dag_name=dag.dag_id, dag_name="churn_bigquery_load", default_args=default_args, dataset_s3_bucket="telemetry-parquet", aws_conn_id="aws_dev_iam_s3", dataset="churn", dataset_version="v3", date_submission_col="week_start",
default_args = { 'owner': '*****@*****.**', 'depends_on_past': False, 'start_date': datetime(2018, 9, 10), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 2, 'retry_delay': timedelta(minutes=30), } dag = DAG('landfill', default_args=default_args, schedule_interval='0 1 * * *') landfill_sampler = MozDatabricksSubmitRunOperator( task_id="landfill_sampler", job_name="Landfill Sampler", execution_timeout=timedelta(hours=2), instance_count=3, iam_role= "arn:aws:iam::144996185633:instance-profile/databricks-ec2-landfill", env=mozetl_envvar( "landfill_sampler", { "submission-date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}", "prefix": "sanitized-landfill-sample", }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag)
instance_count=5, env={"date": DS_WEEKLY}, uri= "https://raw.githubusercontent.com/mozilla/distribution-viewer/master/notebooks/aggregate-and-import.py", dag=dag) taar_locale_job = EMRSparkOperator( task_id="taar_locale_job", job_name="TAAR Locale Model", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], execution_timeout=timedelta(hours=10), instance_count=5, env=mozetl_envvar( "taar_locale", { "date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}", "prefix": "taar/locale/" }), release_label="emr-5.8.0", uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="private", dag=dag) taar_legacy_job = EMRSparkOperator( task_id="taar_legacy_job", job_name="TAAR Legacy Model", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], execution_timeout=timedelta(hours=1), instance_count=1,
"glue_secret_access_key": "{{ var.value.glue_secret_access_key }}", "glue_default_region": "{{ var.value.glue_default_region }}", }, uri= "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/update_glue.sh", dag=dag) taar_dynamo = EMRSparkOperator( task_id="taar_dynamo", job_name="TAAR DynamoDB loader", execution_timeout=timedelta(hours=14), instance_count=6, disable_on_dev=True, owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**", "*****@*****.**"], env=mozetl_envvar("taar_dynamo", {"date": "{{ ds_nodash }}"}), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="private", dag=dag) taar_locale_job = SubDagOperator( task_id="taar_locale_job", subdag=moz_dataproc_pyspark_runner( parent_dag_name=dag.dag_id, dag_name="taar_locale_job", default_args=default_args, cluster_name=taar_locale_cluster_name, job_name="TAAR_Locale", python_driver_code= "gs://moz-fx-data-prod-airflow-dataproc-artifacts/jobs/taar_locale.py",
task_id="game_hw_survey", job_name="Firefox Hardware Report", execution_timeout=timedelta(hours=5), instance_count=15, owner="*****@*****.**", depends_on_past=True, email=["*****@*****.**", "*****@*****.**", "*****@*****.**"], env={"date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.public_output_bucket }}"}, uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/hardware_report.sh", output_visibility="public", dag=dag) taar_lite_guidranking = EMRSparkOperator( task_id="taar_lite_guidranking", job_name="TAARlite Addon Ranking", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], execution_timeout=timedelta(hours=2), instance_count=4, env=mozetl_envvar("taar_lite_guidranking", {"date": "{{ ds_nodash }}"}, {'MOZETL_SUBMISSION_METHOD': 'spark'}), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="private", dag=dag) game_hw_survey.set_upstream(longitudinal) taar_lite_guidranking.set_upstream(longitudinal)
schedule_interval="@daily") mobile_aggregate_view = MozDatabricksSubmitRunOperator( task_id="mobile_aggregate_view", job_name="Mobile Aggregate View", release_label="6.1.x-scala2.11", instance_count=5, execution_timeout=timedelta(hours=12), env=mozetl_envvar( "mobile", { "date": "{{ ds_nodash }}", "channels": "nightly", "output": "s3://{{ task.__class__.private_output_bucket }}/mobile_metrics_aggregates/v2", "num-partitions": 5 * 32 }, other={ "MOZETL_GIT_PATH": "https://github.com/mozilla/python_mozaggregator.git", "MOZETL_EXTERNAL_MODULE": "mozaggregator", }, ), dag=dag, ) register_status( mobile_aggregate_view, "Mobile Aggregates", "Aggregates of metrics sent through the mobile-events pings.", )
execution_delta=timedelta( days=-7, hours=-1 ), # main_summary waits one hour, execution date is beginning of the week dag=taar_weekly, ) taar_ensemble = MozDatabricksSubmitRunOperator( task_id="taar_ensemble", job_name="TAAR Ensemble Model", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], execution_timeout=timedelta(hours=11), instance_count=5, instance_type="i3.2xlarge", spot_bid_price_percent=100, max_instance_count=60, enable_autoscale=True, pypi_libs=[ "mozilla-taar3==0.4.5", "mozilla-srgutil==0.1.10", "python-decouple==3.1", ], env=mozetl_envvar("taar_ensemble", {"date": "{{ ds_nodash }}"}), start_date=datetime(2019, 7, 14), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-databricks.py", output_visibility="private", ) taar_ensemble.set_upstream(wait_for_clients_daily)
schedule_interval="@daily", ) prerelease_telemetry_aggregate_view = MozDatabricksSubmitRunOperator( task_id="prerelease_telemetry_aggregate_view", job_name="Prerelease Telemetry Aggregate View", instance_count=10, dev_instance_count=10, execution_timeout=timedelta(hours=12), python_version=2, env=mozetl_envvar( "aggregator", { "date": "{{ ds_nodash }}", "channels": "nightly,aurora,beta", "credentials-bucket": "telemetry-spark-emr-2", "credentials-prefix": "aggregator_database_envvars.json", "num-partitions": 10 * 32, }, dev_options={ "credentials-prefix": "aggregator_dev_database_envvars.json" }, other={ "MOZETL_GIT_PATH": "https://github.com/mozilla/python_mozaggregator.git", "MOZETL_EXTERNAL_MODULE": "mozaggregator", }, ), dag=dag, )
dataset_s3_bucket="telemetry-parquet", aws_conn_id="aws_dev_iam_s3", dataset="sync_flat_summary", dataset_version="v1", gke_cluster_name="bq-load-gke-1", ), task_id="sync_flat_view_bigquery_load", dag=dag) sync_bookmark_validation = EMRSparkOperator( task_id="sync_bookmark_validation", job_name="Sync Bookmark Validation", execution_timeout=timedelta(hours=2), instance_count=1, email=["*****@*****.**", "*****@*****.**"], env=mozetl_envvar("sync_bookmark_validation", { "start_date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}", }), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag) sync_bookmark_validation.set_upstream(sync_view) sync_view_bigquery_load.set_upstream(sync_view) sync_events_view_bigquery_load.set_upstream(sync_events_view) sync_flat_view_bigquery_load.set_upstream(sync_flat_view)
dag = DAG( "telemetry_aggregates_parquet", default_args=default_args, schedule_interval="@daily", ) telemetry_aggregate_parquet_view = MozDatabricksSubmitRunOperator( task_id="telemetry_aggregate_parquet_view", job_name="Telemetry Aggregate Parquet View", instance_count=5, execution_timeout=timedelta(hours=12), python_version=2, env=mozetl_envvar( "parquet", { "date": "{{ ds_nodash }}", "channels": "nightly", "output": "s3://{{ task.__class__.private_output_bucket }}/aggregates_poc/v1", }, other={ "MOZETL_GIT_PATH": "https://github.com/mozilla/python_mozaggregator.git", "MOZETL_EXTERNAL_MODULE": "mozaggregator", }, ), dag=dag, )
main_summary_schema = EmailSchemaChangeOperator( task_id="main_summary_schema", email=["*****@*****.**", "*****@*****.**"], to=["*****@*****.**", "*****@*****.**"], key_prefix='schema/main_summary/submission_date_s3=', dag=dag) engagement_ratio = EMRSparkOperator( task_id="engagement_ratio", job_name="Update Engagement Ratio", execution_timeout=timedelta(hours=6), instance_count=10, env=mozetl_envvar("engagement_ratio", options={ "input_bucket": "{{ task.__class__.private_output_bucket }}", "output_bucket": "net-mozaws-prod-us-west-2-pipeline-analysis" }, dev_options={ "output_bucket": "{{ task.__class__.private_output_bucket }}" }), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="public", dag=dag) addons = EMRSparkOperator( task_id="addons", job_name="Addons View", execution_timeout=timedelta(hours=4), instance_count=3, env=tbv_envvar("com.mozilla.telemetry.views.AddonsView", { "from": "{{ ds_nodash }}", "to": "{{ ds_nodash }}",
dataset="main_summary", dataset_version="v4", gke_cluster_name="bq-load-gke-1", ), task_id="main_summary_bigquery_load", dag=dag) engagement_ratio = EMRSparkOperator( task_id="engagement_ratio", job_name="Update Engagement Ratio", execution_timeout=timedelta(hours=6), instance_count=10, env=mozetl_envvar("engagement_ratio", options={ "input_bucket": "{{ task.__class__.private_output_bucket }}", "output_bucket": "net-mozaws-prod-us-west-2-pipeline-analysis" }, dev_options={ "output_bucket": "{{ task.__class__.private_output_bucket }}" }), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="public", dag=dag) addons = EMRSparkOperator( task_id="addons", job_name="Addons View", execution_timeout=timedelta(hours=4), instance_count=3, env=tbv_envvar("com.mozilla.telemetry.views.AddonsView", { "from": "{{ ds_nodash }}", "to": "{{ ds_nodash }}",
) addons_daily = MozDatabricksSubmitRunOperator( task_id="addons_daily", job_name="Addons Daily", execution_timeout=timedelta(hours=4), instance_count=10, owner="*****@*****.**", email=[ "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", ], env=mozetl_envvar( "addons_report", { "date": "{{ ds_nodash }}", "deploy_environment": "{{ task.__class__.deploy_environment }}", }, other={ "MOZETL_GIT_PATH": "https://github.com/mozilla/addons_daily.git", "MOZETL_EXTERNAL_MODULE": "addons_daily", }, ), dag=dag, ) addons_daily.set_upstream(wait_for_search_clients_daily)
'email_on_retry': True, 'retries': 3, 'retry_delay': timedelta(minutes=30), } dag = DAG('taar_amodump', default_args=default_args, schedule_interval='@daily') amodump = EMRSparkOperator( task_id="taar_amodump", job_name="Dump AMO JSON blobs with oldest creation date", execution_timeout=timedelta(hours=1), instance_count=1, owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], env=mozetl_envvar("taar_amodump", {"date": "{{ ds_nodash }}"}, {'MOZETL_SUBMISSION_METHOD': 'python'}), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="private", dag=dag ) amowhitelist = EMRSparkOperator( task_id="taar_amowhitelist", job_name="Generate an algorithmically defined set of whitelisted addons for TAAR", execution_timeout=timedelta(hours=1), instance_count=1, owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], env=mozetl_envvar("taar_amowhitelist", {},
'email_on_failure': True, 'email_on_retry': True, 'retries': 2, 'retry_delay': timedelta(minutes=30), } dag = DAG('churn', default_args=default_args, schedule_interval='0 0 * * 3') churn = EMRSparkOperator( task_id="churn", job_name="churn 7-day v3", execution_timeout=timedelta(hours=4), instance_count=5, env=mozetl_envvar( "churn", { "start_date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}" }), uri= "https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", output_visibility="public", dag=dag) churn_v2 = EMRSparkOperator( task_id="churn_v2", job_name="churn 7-day v2", execution_timeout=timedelta(hours=4), instance_count=5, env=mozetl_envvar( "churn", { "start_date": "{{ ds_nodash }}",
from airflow import DAG from datetime import datetime, timedelta from operators.emr_spark_operator import EMRSparkOperator from utils.mozetl import mozetl_envvar default_args = { 'owner': '*****@*****.**', 'depends_on_past': False, 'start_date': datetime(2018, 11, 26), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 3, 'retry_delay': timedelta(minutes=30), } dag = DAG('tab_spinner_severity', default_args=default_args, schedule_interval='@daily') update_tab_spinner_severity = EMRSparkOperator( task_id="update_tab_spinner_severity", job_name="Tab Spinner Severity Job", execution_timeout=timedelta(hours=12), instance_count=12, env=mozetl_envvar("long_tab_spinners", {}), uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-submit.sh", dag=dag )