}, { 'name': 'retention', 'type': 'INTEGER', 'mode': 'NULLABLE' }, ], create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_APPEND', dag=dag) # ================= # == tasks flow === # ================= # dataproc upstream & downstream for both create and delete dataproc create_cluster_1.set_upstream(push_unique_cluster_name) create_cluster_1.set_upstream(sensor_task) create_cluster_2.set_upstream(push_unique_cluster_name) create_cluster_2.set_upstream(delete_cluster_1) # create job upstream & downstream calc_unique_users.set_upstream(create_cluster_1) calc_unique_users.set_downstream(bq_load_user) calc_unique_users.set_downstream(calc_agg) calc_agg.set_downstream(bq_load_agg) calc_retention_day1.set_upstream(create_cluster_2) calc_retention_day1.set_downstream(bq_load_retention) # create cfs_to_bq upstream & downstream bq_load_agg.set_downstream(delete_cluster_1) bq_load_retention.set_downstream(delete_cluster_2)
main_class='com.cohort.process.RetentionProcess', region='us-west1', job_name=dag_name + 'bike_share_retention_d7', cluster_name='{{ ti.xcom_pull(key="cluster_name", task_ids="push-cluster-name") }}' + '4', execution_timeout=timedelta(minutes=180), arguments=args) unique_user_sensor = GoogleCloudStorageObjectSensor( task_id='unique_user_sensor', bucket='jiuzhangsuanfa', object='bike/unique-user/_SUCCESS', poke_interval=30, timeout=2700) unique_user.set_upstream(dataproc_create_cluster_1) unique_user.set_downstream(bike_share_aggregator) bike_share_aggregator.set_downstream(dataproc_destroy_cluster_1) bike_share_retention_d1.set_upstream(dataproc_create_cluster_2) bike_share_retention_d1.set_downstream(dataproc_destroy_cluster_2) bike_share_retention_d3.set_upstream(dataproc_create_cluster_3) bike_share_retention_d3.set_downstream(dataproc_destroy_cluster_3) bike_share_retention_d7.set_upstream(dataproc_create_cluster_4)