Exemplo n.º 1
0
def histogram_aggregates_subdag(parent_dag_name, child_dag_name, default_args,
                                schedule_interval, dataset_id):
    GLAM_HISTOGRAM_AGGREGATES_SUBDAG = "%s.%s" % (parent_dag_name,
                                                  child_dag_name)
    default_args["depends_on_past"] = True
    dag = DAG(
        GLAM_HISTOGRAM_AGGREGATES_SUBDAG,
        default_args=default_args,
        schedule_interval=schedule_interval,
    )

    clients_histogram_aggregates_new = bigquery_etl_query(
        task_id="clients_histogram_aggregates_new",
        destination_table="clients_histogram_aggregates_new_v1",
        dataset_id=dataset_id,
        project_id="moz-fx-data-shared-prod",
        owner="*****@*****.**",
        email=["*****@*****.**", "*****@*****.**"],
        date_partition_parameter=None,
        parameters=("submission_date:DATE:{{ds}}", ),
        arguments=("--replace", ),
        dag=dag,
    )

    clients_histogram_aggregates_final = SubDagOperator(
        subdag=repeated_subdag(
            GLAM_HISTOGRAM_AGGREGATES_SUBDAG,
            GLAM_HISTOGRAM_AGGREGATES_FINAL_SUBDAG,
            default_args,
            dag.schedule_interval,
            dataset_id,
        ),
        task_id=GLAM_HISTOGRAM_AGGREGATES_FINAL_SUBDAG,
        executor=get_default_executor(),
        dag=dag,
    )

    clients_histogram_aggregates_new >> clients_histogram_aggregates_final
    return dag
Exemplo n.º 2
0
        project_id,
        "--dataset",
        dataset_id,
    ],
    docker_image="mozilla/bigquery-etl:latest",
    dag=dag,
)

# SubdagOperator uses a SequentialExecutor by default
# so its tasks will run sequentially.
clients_histogram_bucket_counts = SubDagOperator(
    subdag=repeated_subdag(
        GLAM_DAG,
        "clients_histogram_bucket_counts",
        default_args,
        dag.schedule_interval,
        dataset_id,
        ("submission_date:DATE:{{ds}}", ),
        10,
        None,
    ),
    task_id="clients_histogram_bucket_counts",
    dag=dag,
)

clients_histogram_probe_counts = bigquery_etl_query(
    task_id="clients_histogram_probe_counts",
    destination_table="clients_histogram_probe_counts_v1",
    dataset_id=dataset_id,
    project_id=project_id,
    owner="*****@*****.**",
    date_partition_parameter=None,