Пример #1
0
            "to": "{{ ds_nodash }}",
            "schema-report-location": "s3://{{ task.__class__.private_output_bucket }}/schema/main_summary/submission_date_s3={{ ds_nodash }}",
            "bucket": "{{ task.__class__.private_output_bucket }}",
            "read-mode": "aligned",
            "input-partition-multiplier": "400"
        },
        dev_options={
            "channel": "nightly",   # run on smaller nightly data rather than release
        }),
    dag=dag)

register_status(main_summary, "Main Summary", "A summary view of main pings.")

main_summary_schema = EmailSchemaChangeOperator(
    task_id="main_summary_schema",
    email=["*****@*****.**", "*****@*****.**"],
    to=["*****@*****.**", "*****@*****.**"],
    key_prefix='schema/main_summary/submission_date_s3=',
    dag=dag)

main_summary_bigquery_load = SubDagOperator(
    subdag=load_to_bigquery(
        parent_dag_name=dag.dag_id,
        dag_name="main_summary_bigquery_load",
        default_args=default_args,
        dataset_s3_bucket="telemetry-parquet",
        aws_conn_id="aws_dev_iam_s3",
        dataset="main_summary",
        dataset_version="v4",
        gke_cluster_name="bq-load-gke-1",
        ),
    task_id="main_summary_bigquery_load",
Пример #2
0
    instance_count=40,
    env=tbv_envvar(
        "com.mozilla.telemetry.views.MainSummaryView", {
            "from": "{{ ds_nodash }}",
            "to": "{{ ds_nodash }}",
            "schema-report-location":
            "s3://{{ task.__class__.private_output_bucket }}/schema/main_summary/submission_date_s3={{ ds_nodash }}",
            "bucket": "{{ task.__class__.private_output_bucket }}"
        }),
    uri=
    "https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
    dag=dag)

main_summary_schema = EmailSchemaChangeOperator(
    task_id="main_summary_schema",
    email=["*****@*****.**", "*****@*****.**"],
    to=["*****@*****.**"],
    key_prefix='schema/main_summary/submission_date_s3=',
    dag=dag)

experiments_error_aggregates = EMRSparkOperator(
    task_id="experiments_error_aggregates",
    job_name="Experiments Error Aggregates View",
    execution_timeout=timedelta(hours=5),
    instance_count=20,
    release_label="emr-5.13.0",
    owner="*****@*****.**",
    email=["*****@*****.**", "*****@*****.**"],
    env={
        "date": "{{ ds_nodash }}",
        "bucket": "{{ task.__class__.private_output_bucket }}"
    },