예제 #1
0
    "input_event_response_coalesced_ms_main_above_250",
    "input_event_response_coalesced_ms_main_above_2500",
    "input_event_response_coalesced_ms_content_above_150",
    "input_event_response_coalesced_ms_content_above_250",
    "input_event_response_coalesced_ms_content_above_2500",
    "ghost_windows_main_above_1",
    "ghost_windows_content_above_1",
]

main_summary_export = SubDagOperator(
    subdag=export_to_parquet(
        table="moz-fx-data-shared-prod:telemetry_derived.main_summary_v4${{ds_nodash}}",
        static_partitions=["submission_date_s3={{ds_nodash}}"],
        arguments=[
            "--partition-by=sample_id",
            "--replace='{{ds_nodash}}' AS submission_date",
            "--maps-from-entries",
        ] + main_summary_bigint_columns,
        parent_dag_name=dag.dag_id,
        dag_name="main_summary_export",
        default_args=default_args,
        num_workers=40),
    task_id="main_summary_export",
    executor=get_default_executor(),
    dag=dag)

register_status(main_summary, "Main Summary", "A summary view of main pings.")

addons = bigquery_etl_query(
    task_id="addons",
    destination_table="addons_v2",
    project_id="moz-fx-data-shared-prod",
예제 #2
0
    task_id='clients_last_seen',
    bql='sql/clients_last_seen_v1.sql',
    destination_dataset_table='telemetry.clients_last_seen_v1${{ds_nodash}}',
    write_disposition='WRITE_TRUNCATE',
    use_legacy_sql=False,
    bigquery_conn_id="google_cloud_derived_datasets",
    depends_on_past=True,
    start_date=datetime(2019, 4, 15),
    dag=dag,
)

clients_last_seen_export = SubDagOperator(
    subdag=export_to_parquet(
        table="clients_last_seen_v1",
        arguments=["--submission-date={{ds}}"],
        parent_dag_name=dag.dag_id,
        dag_name="clients_last_seen_export",
        default_args=default_args,
        num_preemptible_workers=10),
    task_id="clients_last_seen_export",
    dag=dag)

exact_mau_by_dimensions = BigQueryOperator(
    task_id='exact_mau_by_dimensions',
    bql='sql/firefox_desktop_exact_mau28_by_dimensions_v1.sql',
    destination_dataset_table='telemetry.firefox_desktop_exact_mau28_by_dimensions_v1${{ds_nodash}}',
    write_disposition='WRITE_TRUNCATE',
    use_legacy_sql=False,
    bigquery_conn_id="google_cloud_derived_datasets",
    dag=dag,
)
예제 #3
0
    email=[
        "*****@*****.**", "*****@*****.**",
        "*****@*****.**"
    ],
    depends_on_past=True,
    start_date=datetime(2019, 4, 15),
    dag=dag)

clients_last_seen_export = SubDagOperator(subdag=export_to_parquet(
    table="clients_last_seen_v1",
    arguments=[
        "--dataset=telemetry_derived", "--submission-date={{ds}}",
        "--destination-table=clients_last_seen_v1", "--select",
        "cast(log2(days_seen_bits & -days_seen_bits) as long) as days_since_seen",
        "cast(log2(days_visited_5_uri_bits & -days_visited_5_uri_bits) as long) as days_since_visited_5_uri",
        "cast(log2(days_opened_dev_tools_bits & -days_opened_dev_tools_bits) as long) as days_since_opened_dev_tools",
        "cast(log2(days_created_profile_bits & -days_created_profile_bits) as long) as days_since_created_profile",
        "*"
    ],
    parent_dag_name=dag.dag_id,
    dag_name="clients_last_seen_export",
    default_args=default_args,
    num_preemptible_workers=10),
                                          task_id="clients_last_seen_export",
                                          dag=dag)

exact_mau_by_dimensions = bigquery_etl_query(
    task_id="exact_mau_by_dimensions",
    destination_table="firefox_desktop_exact_mau28_by_dimensions_v1",
    dataset_id="telemetry",
    owner="*****@*****.**",
예제 #4
0
    "input_event_response_coalesced_ms_main_above_250",
    "input_event_response_coalesced_ms_main_above_2500",
    "input_event_response_coalesced_ms_content_above_150",
    "input_event_response_coalesced_ms_content_above_250",
    "input_event_response_coalesced_ms_content_above_2500",
    "ghost_windows_main_above_1",
    "ghost_windows_content_above_1",
]

main_summary_export = SubDagOperator(subdag=export_to_parquet(
    table=
    "moz-fx-data-shared-prod:telemetry_derived.main_summary_v4${{ds_nodash}}",
    static_partitions=["submission_date_s3={{ds_nodash}}"],
    arguments=[
        "--partition-by=sample_id",
        "--replace='{{ds_nodash}}' AS submission_date",
        "--maps-from-entries",
    ] + main_summary_bigint_columns,
    parent_dag_name=dag.dag_id,
    dag_name="main_summary_export",
    default_args=default_args,
    num_workers=40),
                                     task_id="main_summary_export",
                                     executor=GetDefaultExecutor(),
                                     dag=dag)

register_status(main_summary, "Main Summary", "A summary view of main pings.")

addons = bigquery_etl_query(task_id="addons",
                            destination_table="addons_v2",
                            dataset_id="telemetry_derived",
예제 #5
0
    project_id="moz-fx-data-shared-prod",
    dataset_id="telemetry_derived",
    sql_file_path="sql/telemetry_derived/main_summary_v4/",
    multipart=True,
    owner="*****@*****.**",
    email=["*****@*****.**", "*****@*****.**"],
    start_date=datetime(2019, 10, 25),
    dag=dag)

sql_main_summary_export = SubDagOperator(subdag=export_to_parquet(
    table=
    "moz-fx-data-shared-prod:telemetry_derived.main_summary_v4${{ds_nodash}}",
    destination_table="sql_main_summary_v4",
    static_partitions="submission_date_s3={{ds_nodash}}",
    arguments=[
        "--partition-by=sample_id",
        "--replace='{{ds_nodash}}' AS submission_date",
        "--maps-from-entries",
    ],
    parent_dag_name=dag.dag_id,
    dag_name="sql_main_summary_export",
    default_args=default_args,
    num_workers=40),
                                         task_id="sql_main_summary_export",
                                         dag=dag)

main_summary = MozDatabricksSubmitRunOperator(
    task_id="main_summary",
    job_name="Main Summary View",
    execution_timeout=timedelta(hours=6),
    email=[
        "*****@*****.**", "*****@*****.**",
        dataset_id="telemetry_derived",
        owner="*****@*****.**",
        email=["*****@*****.**", "*****@*****.**"])

    event_events_export = SubDagOperator(
        subdag=export_to_parquet(
            table="moz-fx-data-shared-prod:telemetry_derived.event_events_v1${{ds_nodash}}",
            destination_table="events_v1",
            static_partitions=["submission_date_s3={{ds_nodash}}", "doc_type=event"],
            arguments=[
                "--drop=submission_date",
                "--partition-by=doc_type",
                "--replace",
                "UNIX_TIMESTAMP(timestamp) AS timestamp",
                "CAST(sample_id AS STRING) AS sample_id",
                "UNIX_TIMESTAMP(session_start_time) AS session_start_time",
                "MAP_FROM_ARRAYS(experiments.key, experiments.value.branch) AS experiments",
                "MAP_FROM_ENTRIES(event_map_values) AS event_map_values",
                "--bigint-columns",
                "sample_id",
                "event_timestamp",
            ],
            s3_output_bucket="telemetry-parquet",
            parent_dag_name=dag.dag_id,
            dag_name="event_events_export",
            default_args=default_args,
            num_workers=10),
        task_id="event_events_export",
        executor=GetDefaultExecutor(),
        owner="*****@*****.**",
        email=["*****@*****.**", "*****@*****.**"],
        dag=dag)