def main_summary_subdag_factory(parent_dag, task_id, day):
    ds = "{{{{ macros.ds_format(macros.ds_add(ds, {0}), '%Y-%m-%d', '%Y%m%d') }}}}".format(day)
    subdag = DAG("{}.{}".format(parent_dag.dag_id, task_id),
                 schedule_interval=SCHEDULE_INTERVAL,
                 start_date=START_DATE,
                 default_args=default_args)

    parent_job_flow_id = ("{{{{ task_instance.xcom_pull('setup_backfill_cluster', "
                          "key='return_value', dag_id={}) }}}}".format(parent_dag.dag_id))

    # Try to alleviate throttling issues by introducing some slight jitter on each of the days
    timedelta_task = TimeDeltaSensor(
        task_id="day_start_jitter",
        delta=timedelta(seconds=day),
        dag=subdag
    )

    add_step_task = EmrAddStepsOperator(
        task_id='submit_main_summary_day',
        job_flow_id=parent_job_flow_id,
        execution_timeout=timedelta(minutes=10),
        aws_conn_id='aws_default',
        steps=EmrAddStepsOperator.get_step_args(
            job_name="main_summary {}".format(ds),
            owner="*****@*****.**",
            action_on_failure='CONTINUE',
            uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
            env=tbv_envvar("com.mozilla.telemetry.views.MainSummaryView", {
                "from": ds,
                "to": ds,
                "bucket": "telemetry-backfill"
            }, {
                "DO_ASSEMBLY": "False"
            }),
        ),
        dag=subdag
    )

    step_sensor_task = EmrStepSensor(
        task_id="main_summary_step_sensor",
        timeout=timedelta(hours=10).total_seconds(),
        job_flow_id=parent_job_flow_id,
        step_id="{{ task_instance.xcom_pull('submit_main_summary_day', key='return_value') }}",
        poke_interval=timedelta(minutes=5).total_seconds(),
        dag=subdag
    )

    step_sensor_task.set_upstream(add_step_task)
    add_step_task.set_upstream(timedelta_task)

    return subdag
Пример #2
0
    task_id="main_summary_all_histograms",
    job_name="Main Summary View - All Histograms",
    execution_timeout=timedelta(hours=12),
    instance_count=5,
    max_instance_count=50,
    enable_autoscale=True,
    instance_type="c4.4xlarge",
    spot_bid_price_percent=50,
    ebs_volume_count=1,
    ebs_volume_size=250,
    env=tbv_envvar("com.mozilla.telemetry.views.MainSummaryView",
                   options={
                       "from": "{{ ds_nodash }}",
                       "to": "{{ ds_nodash }}",
                       "bucket": "telemetry-backfill",
                       "all_histograms": "",
                       "read-mode": "aligned",
                       "input-partition-multiplier": "400",
                   },
                   dev_options={
                       "channel": "nightly",
                   }),
    dag=dag)

main_summary = MozDatabricksSubmitRunOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=4),
    email=[
        "*****@*****.**", "*****@*****.**",
        "*****@*****.**"
    ],
Пример #3
0
        "date": "{{ ds_nodash }}",
        "bucket": "{{ task.__class__.public_output_bucket }}"
    },
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/hardware_report.sh",
    output_visibility="public",
    dag=dag)

cross_sectional = EMRSparkOperator(
    task_id="cross_sectional",
    job_name="Cross Sectional View",
    execution_timeout=timedelta(hours=10),
    instance_count=30,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.CrossSectionalView", {
            "outName": "v" + DS_WEEKLY,
            "outputBucket": "{{ task.__class__.private_output_bucket }}"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

distribution_viewer = EMRSparkOperator(
    task_id="distribution_viewer",
    job_name="Distribution Viewer",
    owner="*****@*****.**",
    email=["*****@*****.**", "*****@*****.**"],
    execution_timeout=timedelta(hours=10),
    instance_count=5,
    env={"date": DS_WEEKLY},
    uri=
Пример #4
0
}

# Make sure all the data for the given day has arrived before running.
# Running at 1am should suffice.
dag = DAG('main_summary',
          default_args=default_args,
          schedule_interval='0 1 * * *')

main_summary = EMRSparkOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=14),
    instance_count=40,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.MainSummaryView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "bucket": "{{ task.__class__.private_output_bucket }}"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

experiments_error_aggregates = EMRSparkOperator(
    task_id="experiments_error_aggregates",
    job_name="Experiments Error Aggregates View",
    execution_timeout=timedelta(hours=5),
    instance_count=20,
    owner="*****@*****.**",
    email=["*****@*****.**", "*****@*****.**"],
    env={
        "date": "{{ ds_nodash }}",
Пример #5
0
    execution_timeout=timedelta(hours=12),
    instance_count=5,
    max_instance_count=50,
    enable_autoscale=True,
    instance_type="c4.4xlarge",
    spot_bid_price_percent=50,
    ebs_volume_count=1,
    ebs_volume_size=250,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.MainSummaryView",
        options={
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "schema-report-location":
            "s3://{{ task.__class__.private_output_bucket }}/schema/main_summary/submission_date_s3={{ ds_nodash }}",
            "bucket": "telemetry-backfill",
            "all_histograms": "",
            "read-mode": "aligned",
            "input-partition-multiplier": "400",
        },
        dev_options={
            "channel": "nightly",
        }),
    dag=dag)

main_summary = EMRSparkOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=14),
    instance_count=40,
    env=tbv_envvar(
from operators.emr_spark_operator import EMRSparkOperator
from utils.constants import DS_WEEKLY
from utils.tbv import tbv_envvar

default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2017, 5, 26),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('focus_event_longitudinal', default_args=default_args, schedule_interval='@weekly')

focus_event_longitudinal = EMRSparkOperator(
    task_id="focus_event_longitudinal",
    job_name="Focus Event Longitudinal View",
    execution_timeout=timedelta(hours=12),
    instance_count=10,
    env = tbv_envvar("com.mozilla.telemetry.views.GenericLongitudinalView", {
        "to": DS_WEEKLY,
        "tablename": "telemetry_focus_event_parquet",
        "output-path": "{{ task.__class__.private_output_bucket }}/focus_event_longitudinal",
        "num-parquet-files": "30",
        "ordering-columns": "seq,created"}),
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)
Пример #7
0
    timedelta(minutes=30),
}

dag = DAG('longitudinal',
          default_args=default_args,
          schedule_interval='@weekly')

longitudinal = MozDatabricksSubmitRunOperator(
    task_id="longitudinal",
    job_name="Longitudinal View",
    execution_timeout=timedelta(hours=12),
    instance_count=16,
    instance_type="i3.8xlarge",
    env=tbv_envvar("com.mozilla.telemetry.views.LongitudinalView", {
        "bucket": "{{ task.__class__.private_output_bucket }}",
        "to": DS_WEEKLY
    },
                   metastore_location="s3://telemetry-parquet/longitudinal"),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

register_status(longitudinal, "Longitudinal",
                "A 6-month longitudinal view of client history.")

addon_recommender = EMRSparkOperator(
    task_id="addon_recommender",
    job_name="Train the Addon Recommender",
    execution_timeout=timedelta(hours=10),
    instance_count=20,
    owner="*****@*****.**",
devtools_release_events_to_amplitude = EMRSparkOperator(
    task_id="devtools_release_events_to_amplitude",
    job_name="DevTools Release Events to Amplitude",
    execution_timeout=timedelta(hours=8),
    instance_count=DEVTOOLS_INSTANCES,
    dev_instance_count=DEVTOOLS_INSTANCES,
    email=['*****@*****.**', '*****@*****.**'],
    owner='*****@*****.**',
    env=tbv_envvar(
        "com.mozilla.telemetry.streaming.EventsToAmplitude", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "max_parallel_requests": str(
                DEVTOOLS_INSTANCES * VCPUS_PER_INSTANCE),
            "config_file_path": "devtools_release_schemas.json",
            "url": "https://api.amplitude.com/httpapi",
            "sample": "0.5",
            "partition_multiplier": "5"
        },
        artifact_url=get_artifact_url(slug),
        other={
            "KEY_BUCKET": "telemetry-airflow",
            "KEY_PATH": key_path("devtools"),
            "DO_EVENTS_TO_AMPLITUDE_SETUP": "True"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    start_date=datetime(2018, 12, 4),
    dag=dag)
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

# Make sure all the data for the given day has arrived before running.
# Running at 1am should suffice.
dag = DAG('main_summary', default_args=default_args, schedule_interval='0 1 * * *')

main_summary = EMRSparkOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=14),
    instance_count=40,
    env=tbv_envvar("com.mozilla.telemetry.views.MainSummaryView", {
        "from": "{{ ds_nodash }}",
        "to": "{{ ds_nodash }}",
        "bucket": "{{ task.__class__.private_output_bucket }}"}),
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

experiments_error_aggregates = EMRSparkOperator(
    task_id="experiments_error_aggregates",
    job_name="Experiments Error Aggregates View",
    execution_timeout=timedelta(hours=5),
    instance_count=20,
    release_label="emr-5.13.0",
    owner="*****@*****.**",
    email=["*****@*****.**", "*****@*****.**"],
    env={"date": "{{ ds_nodash }}", "bucket": "{{ task.__class__.private_output_bucket }}"},
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/experiments_error_aggregates_view.sh",
    dag=dag)
Пример #10
0
    'email': ['*****@*****.**', '*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('sync_view', default_args=default_args, schedule_interval='@daily')

sync_view = EMRSparkOperator(
    task_id="sync_view",
    job_name="Sync Pings View",
    execution_timeout=timedelta(hours=10),
    instance_count=5,
    env = tbv_envvar("com.mozilla.telemetry.views.SyncView", {
        "from": "{{ ds_nodash }}",
        "to": "{{ ds_nodash }}",
        "bucket": "{{ task.__class__.private_output_bucket }}"}),
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

sync_view_bigquery_load = SubDagOperator(
    subdag=load_to_bigquery(
        parent_dag_name=dag.dag_id,
        dag_name="sync_view_bigquery_load",
        default_args=default_args,
        dataset_s3_bucket="telemetry-parquet",
        aws_conn_id="aws_dev_iam_s3",
        dataset="sync_summary",
        dataset_version="v2",
        gke_cluster_name="bq-load-gke-1",
        ),
Пример #11
0
    job_flow_overrides=EmrCreateJobFlowSelectiveTemplateOperator.
    get_jobflow_args(owner="*****@*****.**",
                     instance_count=20,
                     keep_alive=True,
                     job_name="Main Summary Backfill"),
    templated_job_flow_overrides={
        "Name":
        "Main Summary Backfill {{ ds }}",
        "Steps":
        EmrCreateJobFlowSelectiveTemplateOperator.get_step_args(
            job_name="compile_main_summary",
            owner="*****@*****.**",
            uri=
            "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
            env=tbv_envvar(None,
                           options={},
                           branch="backfill",
                           other={"DO_SUBMIT": "False"})),
    },
    dag=dag)

cluster_start_sensor_task = MozEmrClusterStartSensor(
    task_id="wait_for_cluster",
    timeout=timedelta(hours=1).total_seconds(),
    job_flow_id=job_flow_id_template,
    dag=dag)

terminate_job_flow_task = EmrTerminateJobFlowOperator(
    task_id="terminate_backfill_cluster",
    aws_conn_id='aws_default',
    execution_timeout=timedelta(minutes=10),
    job_flow_id=job_flow_id_template,
    'retry_delay': timedelta(minutes=30),
}

# Make sure all the data for the given day has arrived before running.
# Running at 1am should suffice.
dag = DAG('first_shutdown_summary', default_args=default_args, schedule_interval='0 1 * * *')

first_shutdown_summary = EMRSparkOperator(
    task_id="first_shutdown_summary",
    job_name="First Shutdown Summary View",
    execution_timeout=timedelta(hours=1),
    instance_count=1,
    env=tbv_envvar("com.mozilla.telemetry.views.MainSummaryView", {
        "from": "{{ ds_nodash }}",
        "to": "{{ ds_nodash }}",
        "bucket": "{{ task.__class__.private_output_bucket }}",
        "doc-type": "first_shutdown",
        "read-mode": "aligned",
        "input-partition-multiplier": "4"
    }),
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

first_shutdown_summary_bigquery_load = SubDagOperator(
    subdag=load_to_bigquery(
        parent_dag_name=dag.dag_id,
        dag_name="first_shutdown_summary_bigquery_load",
        default_args=default_args,
        dataset_s3_bucket="telemetry-parquet",
        aws_conn_id="aws_dev_iam_s3",
        dataset="first_shutdown_summary",
        dataset_version="v4",
Пример #13
0
default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2018, 6, 27),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('event_ping_events',
          default_args=default_args,
          schedule_interval='0 1 * * *')

event_ping_events = EMRSparkOperator(
    task_id="event_ping_events",
    job_name="Event Ping Events Dataset",
    execution_timeout=timedelta(hours=8),
    instance_count=5,
    env=tbv_envvar(
        "com.mozilla.telemetry.streaming.EventPingEvents", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "outputPath": "s3://{{ task.__class__.private_output_bucket }}/"
        },
        artifact_url=url),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)
Пример #14
0

slug = "{{ task.__class__.telemetry_streaming_slug }}"
url = get_artifact_url(slug)

default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2018, 11, 26),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('event_ping_events', default_args=default_args, schedule_interval='0 1 * * *')

event_ping_events = EMRSparkOperator(
    task_id="event_ping_events",
    job_name="Event Ping Events Dataset",
    execution_timeout=timedelta(hours=8),
    instance_count=5,
    env=tbv_envvar("com.mozilla.telemetry.streaming.EventPingEvents", {
        "from": "{{ ds_nodash }}",
        "to": "{{ ds_nodash }}",
        "outputPath": "s3://{{ task.__class__.private_output_bucket }}/"
    }, artifact_url=url),
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)
Пример #15
0
# Make sure all the data for the given day has arrived before running.
# Running at 1am should suffice.
dag = DAG('main_summary',
          default_args=default_args,
          schedule_interval='0 1 * * *')

main_summary = EMRSparkOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=14),
    instance_count=40,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.MainSummaryView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "schema-report-location":
            "s3://{{ task.__class__.private_output_bucket }}/schema/main_summary/submission_date_s3={{ ds_nodash }}",
            "bucket": "{{ task.__class__.private_output_bucket }}"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

main_summary_schema = EmailSchemaChangeOperator(
    task_id="main_summary_schema",
    email=["*****@*****.**", "*****@*****.**"],
    to=["*****@*****.**"],
    key_prefix='schema/main_summary/submission_date_s3=',
    dag=dag)

experiments_error_aggregates = EMRSparkOperator(
Пример #16
0
from utils.constants import DS_WEEKLY
from utils.tbv import tbv_envvar

default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2018, 10, 21),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('quantum_release_criteria_view',
          default_args=default_args,
          schedule_interval='@weekly')

quantum_release_criteria_view = EMRSparkOperator(
    task_id="quantum_release_criteria_view",
    job_name="Quantum Release Criteria View",
    execution_timeout=timedelta(hours=2),
    instance_count=10,
    env=tbv_envvar("com.mozilla.telemetry.views.QuantumRCView", {
        "to": DS_WEEKLY,
        "bucket": "{{ task.__class__.private_output_bucket }}"
    }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)
Пример #17
0
    task_id="main_summary_all_histograms",
    job_name="Main Summary View - All Histograms",
    execution_timeout=timedelta(hours=12),
    instance_count=5,
    max_instance_count=50,
    enable_autoscale=True,
    instance_type="c4.4xlarge",
    spot_bid_price_percent=50,
    ebs_volume_count=1,
    ebs_volume_size=250,
    env=tbv_envvar("com.mozilla.telemetry.views.MainSummaryView",
        options={
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "bucket": "telemetry-backfill",
            "all_histograms": "",
            "read-mode": "aligned",
            "input-partition-multiplier": "400",
        },
        dev_options={
            "channel": "nightly",
        }),
    dag=dag)

main_summary = MozDatabricksSubmitRunOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=4),
    email=["*****@*****.**", "*****@*****.**", "*****@*****.**"],
    instance_count=5,
    max_instance_count=40,
    enable_autoscale=True,
Пример #18
0
    'retries':
    2,
    'retry_delay':
    timedelta(minutes=30),
}

dag = DAG('sync_view', default_args=default_args, schedule_interval='@daily')

sync_view = EMRSparkOperator(
    task_id="sync_view",
    job_name="Sync Pings View",
    execution_timeout=timedelta(hours=10),
    instance_count=5,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.SyncView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "bucket": "{{ task.__class__.private_output_bucket }}"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

sync_view_bigquery_load = SubDagOperator(subdag=load_to_bigquery(
    parent_dag_name=dag.dag_id,
    dag_name="sync_view_bigquery_load",
    default_args=default_args,
    dataset_s3_bucket="telemetry-parquet",
    aws_conn_id="aws_dev_iam_s3",
    dataset="sync_summary",
    dataset_version="v2",
    gke_cluster_name="bq-load-gke-1",
Пример #19
0
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

# Make sure all the data for the given day has arrived before running.
# Running at 1am should suffice.
dag = DAG('first_shutdown_summary',
          default_args=default_args,
          schedule_interval='0 1 * * *')

first_shutdown_summary = EMRSparkOperator(
    task_id="first_shutdown_summary",
    job_name="First Shutdown Summary View",
    execution_timeout=timedelta(hours=1),
    instance_count=1,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.MainSummaryView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "bucket": "{{ task.__class__.private_output_bucket }}",
            "doc-type": "first_shutdown",
            "read-mode": "aligned",
            "input-partition-multiplier": "4"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)
Пример #20
0
    'retries': 2,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('longitudinal', default_args=default_args, schedule_interval='@weekly')

longitudinal = MozDatabricksSubmitRunOperator(
    task_id="longitudinal",
    job_name="Longitudinal View",
    execution_timeout=timedelta(hours=12),
    instance_count=16,
    instance_type="i3.8xlarge",
    env=tbv_envvar(
        "com.mozilla.telemetry.views.LongitudinalView",
        {
            "bucket": "{{ task.__class__.private_output_bucket }}",
            "to": DS_WEEKLY
        },
        metastore_location="s3://telemetry-parquet/longitudinal"),
    uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

register_status(longitudinal, "Longitudinal", "A 6-month longitudinal view of client history.")


game_hw_survey = EMRSparkOperator(
    task_id="game_hw_survey",
    job_name="Firefox Hardware Report",
    execution_timeout=timedelta(hours=5),
    instance_count=15,
    owner="*****@*****.**",
default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2018, 11, 20),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 3,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('crash_summary',
          default_args=default_args,
          schedule_interval='@daily')

crash_summary_view = EMRSparkOperator(
    task_id="crash_summary_view",
    job_name="Crash Summary View",
    instance_count=20,
    execution_timeout=timedelta(hours=4),
    env=tbv_envvar(
        "com.mozilla.telemetry.views.CrashSummaryView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "outputBucket": "{{ task.__class__.private_output_bucket }}"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)
default_args = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2016, 9, 20),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 3,
    'retry_delay': timedelta(minutes=30),
}

dag = DAG('crash_aggregates_backfill',
          default_args=default_args,
          schedule_interval='@daily')

crash_aggregates_view_backfill = EMRSparkOperator(
    task_id="crash_aggregates_view_backfill",
    job_name="Crash Aggregates View Backfill",
    instance_count=20,
    execution_timeout=timedelta(hours=4),
    env=tbv_envvar(
        "com.mozilla.telemetry.views.CrashAggregateView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "bucket": "telemetry-test-bucket"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)