Exemplo n.º 1
0
        dag_name="taar_lite",
        default_args=default_args,
        cluster_name=taarlite_cluster_name,
        job_name="TAAR_Lite_GUID_GUID",
        python_driver_code=
        "gs://moz-fx-data-prod-airflow-dataproc-artifacts/jobs/taar_lite_guidguid.py",
        # python_driver_code="gs://temp-hwoo-removemelater/taar_lite_guidguid.py",
        num_workers=8,
        py_args=[
            "--date",
            "{{ ds_nodash }}",
            "--aws_access_key_id",
            aws_access_key,
            "--aws_secret_access_key",
            aws_secret_key,
        ],
        aws_conn_id=aws_conn_id,
        gcp_conn_id=gcpdataproc_conn_id,
    ),
    dag=dag,
)
# Set a dependency on amodump from amowhitelist
amowhitelist.set_upstream(amodump)

# Set a dependency on amodump for the editorial reviewed whitelist of
# addons
editorial_whitelist.set_upstream(amodump)

# Set a dependency on amowhitelist from taar_lite
taar_lite.set_upstream(amowhitelist)
Exemplo n.º 2
0
        ],
        env_vars={
            "AWS_ACCESS_KEY_ID": aws_access_key,
            "AWS_SECRET_ACCESS_KEY": aws_secret_key
        },
        dag=dag)

    schema_generator = GKEPodOperator(
        email=['*****@*****.**'],
        task_id='mozilla_schema_generator',
        gcp_conn_id=gcp_conn_id,
        project_id=connection.project_id,
        location=gke_location,
        cluster_name=gke_cluster_name,
        name='schema-generator-1',
        namespace='default',
        image='mozilla/mozilla-schema-generator:latest',
        is_delete_operator_pod=True,
        image_pull_policy='Always',
        env_vars={
            "MPS_SSH_KEY_BASE64":
            "{{ var.value.mozilla_pipeline_schemas_secret_git_sshkey_b64 }}",
            "MPS_REPO_URL":
            "[email protected]:mozilla-services/mozilla-pipeline-schemas.git",
            "MPS_BRANCH_SOURCE": "master",
            "MPS_BRANCH_PUBLISH": "generated-schemas",
        },
        dag=dag)

    schema_generator.set_upstream(probe_scraper)
Exemplo n.º 3
0
        dag=dag)

    schema_generator = GKEPodOperator(
        email=['*****@*****.**'],
        task_id='mozilla_schema_generator',
        name='schema-generator-1',
        image='mozilla/mozilla-schema-generator:latest',
        env_vars={
            "MPS_SSH_KEY_BASE64": "{{ var.value.mozilla_pipeline_schemas_secret_git_sshkey_b64 }}",
            "MPS_REPO_URL": "[email protected]:mozilla-services/mozilla-pipeline-schemas.git",
            "MPS_BRANCH_SOURCE": "master",
            "MPS_BRANCH_PUBLISH": "generated-schemas",
        },
        dag=dag)

    schema_generator.set_upstream(probe_scraper)

    probe_expiry_alerts = GKEPodOperator(
        task_id="probe-expiry-alerts",
        name="probe-expiry-alerts",
        image=probe_scraper_image,
        arguments=[
            "python3", "-m", "probe_scraper.probe_expiry_alert",
            "--date", "{{ ds }}",
            "--bugzilla-api-key", "{{ var.value.bugzilla_probe_expiry_bot_api_key }}"
        ],
        email=["*****@*****.**"],
        env_vars={
            "AWS_ACCESS_KEY_ID": aws_access_key,
            "AWS_SECRET_ACCESS_KEY": aws_secret_key
        },
Exemplo n.º 4
0
        dag=dag,
    )

    wait_for_bq_events = ExternalTaskSensor(
        task_id="wait_for_bq_events",
        external_dag_id="copy_deduplicate",
        external_task_id="bq_main_events",
        execution_delta=timedelta(hours=3),
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
        dag=dag,
    )

    wait_for_copy_deduplicate_events = ExternalTaskSensor(
        task_id="wait_for_copy_deduplicate_events",
        external_dag_id="copy_deduplicate",
        external_task_id="event_events",
        execution_delta=timedelta(hours=3),
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
        dag=dag,
    )

    jetstream.set_upstream([
        wait_for_clients_daily_export,
        wait_for_main_summary_export,
        wait_for_search_clients_daily,
        wait_for_bq_events,
        wait_for_copy_deduplicate_events,
    ])
Exemplo n.º 5
0
            "*****@*****.**",
            "*****@*****.**",
        ],
        arguments=["{{ds}}"] +
        ["--spreadsheet-id=" + Variable.get('anomdtct_spreadsheet_id')] +
        ["--spreadsheet-key=" + Variable.get('anomdtct_spreadsheet_api_key')],
        dag=dag,
    )

    wait_for_clients_first_seen = ExternalTaskSensor(
        task_id="wait_for_clients_first_seen",
        external_dag_id="main_summary",
        external_task_id="clients_first_seen",
        dag=dag,
    )

    anomdtct.set_upstream([
        wait_for_clients_first_seen,
    ])

    deviations = bigquery_etl_query(
        task_id="deviations",
        project_id="moz-fx-data-shared-prod",
        destination_table="deviations_v1",
        dataset_id="telemetry_derived",
        arguments=("--replace", ),
        dag=dag,
    )

    deviations.set_upstream(anomdtct)
Exemplo n.º 6
0
        image=docker_image,
        dag=dag,
    )

    wait_for_telemetry_derived__ssl_ratios__v1 = ExternalTaskSensor(
        task_id="wait_for_telemetry_derived__ssl_ratios__v1",
        external_dag_id="bqetl_ssl_ratios",
        external_task_id="telemetry_derived__ssl_ratios__v1",
        execution_delta=datetime.timedelta(seconds=7200),
        check_existence=True,
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
    )

    export_public_data_json_telemetry_derived__ssl_ratios__v1.set_upstream(
        wait_for_telemetry_derived__ssl_ratios__v1
    )

    public_data_gcs_metadata = gke_command(
        task_id="public_data_gcs_metadata",
        command=["script/publish_public_data_gcs_metadata"],
        docker_image=docker_image,
        dag=dag,
    )

    public_data_gcs_metadata.set_upstream(
        [
            export_public_data_json_telemetry_derived__ssl_ratios__v1,
        ]
    )
Exemplo n.º 7
0
        ["--project_id=moz-fx-data-shared-prod"] +
        ["--parameter=submission_date:DATE:{{ds}}"],
        image=docker_image,
        dag=dag,
    )

    wait_for_mozregression_aggregates__v1 = ExternalTaskSensor(
        task_id="wait_for_mozregression_aggregates__v1",
        external_dag_id="bqetl_internal_tooling",
        external_task_id="mozregression_aggregates__v1",
        check_existence=True,
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
    )

    export_public_data_json_mozregression_aggregates__v1.set_upstream(
        wait_for_mozregression_aggregates__v1)

    wait_for_telemetry_derived__ssl_ratios__v1 = ExternalTaskSensor(
        task_id="wait_for_telemetry_derived__ssl_ratios__v1",
        external_dag_id="bqetl_ssl_ratios",
        external_task_id="telemetry_derived__ssl_ratios__v1",
        execution_delta=datetime.timedelta(seconds=7200),
        check_existence=True,
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
    )

    export_public_data_json_telemetry_derived__ssl_ratios__v1.set_upstream(
        wait_for_telemetry_derived__ssl_ratios__v1)

    public_data_gcs_metadata = gke_command(
Exemplo n.º 8
0
    wait_for_bq_events = ExternalTaskSensor(
        task_id="wait_for_bq_events",
        external_dag_id="copy_deduplicate",
        external_task_id="bq_main_events",
        execution_delta=timedelta(hours=3),
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
        email_on_retry=False,
        dag=dag,
    )

    wait_for_copy_deduplicate_events = ExternalTaskSensor(
        task_id="wait_for_copy_deduplicate_events",
        external_dag_id="copy_deduplicate",
        external_task_id="event_events",
        execution_delta=timedelta(hours=3),
        mode="reschedule",
        pool="DATA_ENG_EXTERNALTASKSENSOR",
        email_on_retry=False,
        dag=dag,
    )

    jetstream_run.set_upstream([
        wait_for_clients_daily_export,
        wait_for_main_summary_export,
        wait_for_search_clients_daily,
        wait_for_bq_events,
        wait_for_copy_deduplicate_events,
    ])
    jetstream_config_changed.set_upstream(jetstream_run)