) main_summary_dataproc = SubDagOperator( subdag=moz_dataproc_jar_runner( parent_dag_name="main_summary", dag_name="main_summary_dataproc", default_args=default_args, cluster_name="main-summary-{{ds}}", image_version="1.3", worker_machine_type="n1-standard-8", num_preemptible_workers=40, service_account= "*****@*****.**", optional_components=[], install_component_gateway=False, jar_urls=[ "https://s3-us-west-2.amazonaws.com/net-mozaws-data-us-west-2-ops-ci-artifacts/mozilla/telemetry-batch-view/master/telemetry-batch-view.jar", ], main_class="com.mozilla.telemetry.views.MainSummaryView", jar_args=[ "--from={{ds_nodash}}", "--to={{ds_nodash}}", "--bucket=" + main_summary_dataproc_bucket, "--export-path=" + main_ping_bigquery_export_prefix, ], job_name="main_summary_view_{{ds_nodash}}", init_actions_uris=[], gcp_conn_id="google_cloud_airflow_dataproc", ), task_id="main_summary_dataproc", dag=dag, )
subdag=moz_dataproc_jar_runner( parent_dag_name=dag.dag_id, dag_name="addon_recommender", job_name="Train_the_Collaborative_Addon_Recommender", main_class="com.mozilla.telemetry.ml.AddonRecommender", jar_urls=[ "https://s3-us-west-2.amazonaws.com/net-mozaws-data-us-west-2-ops-ci-artifacts" "/mozilla/telemetry-batch-view/master/telemetry-batch-view.jar", ], jar_args=[ "train", "--runDate={{ds_nodash}}", "--inputTable=gs://moz-fx-data-derived-datasets-parquet/clients_daily/v6", "--privateBucket=s3a://telemetry-parquet", "--publicBucket=s3a://telemetry-public-analysis-2", ], cluster_name="addon-recommender-{{ds_nodash}}", image_version="1.3", worker_machine_type="n1-standard-8", num_workers=20, optional_components=[], install_component_gateway=False, init_actions_uris=[], aws_conn_id=taar_aws_conn_id, gcp_conn_id=taar_gcpdataproc_conn_id, default_args={ key: value for key, value in chain(default_args.items(), [ ("owner", "*****@*****.**"), ("email", ["*****@*****.**", "*****@*****.**", "*****@*****.**"]), ]) }, ),
taar_collaborative_recommender = SubDagOperator( task_id="addon_recommender", subdag=moz_dataproc_jar_runner( parent_dag_name=dag.dag_id, dag_name="addon_recommender", job_name="Train_the_Collaborative_Addon_Recommender", main_class="com.mozilla.telemetry.ml.AddonRecommender", jar_urls=[ # GCS bucket for testing is located in `cfr-personalization-experiment` project # 'gs://taar_models/tmp/telemetry-batch-view-1.2.jar' # we should move artifacts to GCS eventually "https://s3-us-west-2.amazonaws.com/net-mozaws-data-us-west-2-ops-ci-artifacts" "/mozilla/telemetry-batch-view/main/telemetry-batch-view.jar", ], jar_args=[ "train", "--runDate={{ds_nodash}}", "--inputTable=gs://moz-fx-data-derived-datasets-parquet/clients_daily/v6", f"--privateBucket=gs://{TAAR_ETL_MODEL_STORAGE_BUCKET}", f"--checkpointDir=gs://{TAAR_ETL_STORAGE_BUCKET}/spark-checkpoints" ], cluster_name="addon-recommender-{{ds_nodash}}", image_version="1.3", worker_machine_type="n1-standard-8", num_workers=20, optional_components=[], install_component_gateway=False, init_actions_uris=[], aws_conn_id=taar_aws_conn_id, gcp_conn_id=taar_gcpdataproc_conn_id, default_args=default_args), dag=dag, )