'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'ods_sqoop_base_user_push_token_df', schedule_interval="00 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_binlog_base_user_push_token_hi_task = OssSensor( task_id='ods_binlog_base_user_push_token_hi_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format( hdfs_path_str="opay_binlog/opay_user_db.opay_user.user_push_token", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_user_push_token_df_pre_task = OssSensor( task_id='ods_sqoop_base_user_push_token_df_pre_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop/opay_user/user_push_token", pt='{{macros.ds_add(ds, -1)}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------##
pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, #依赖不满足时,一分钟检查一次依赖状态 dag=dag ) #路径 hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name else: print("成功") # 依赖前一天分区 dwm_oride_driver_base_df_prev_day_task = OssSensor( task_id='dwm_oride_driver_base_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride/oride_dw/dwm_oride_driver_base_df/country_code=NG", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) dim_oride_city_task = OssSensor( task_id='dim_oride_city_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride/oride_dw/dim_oride_city/country_code=NG", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag )
##----------------------------------------- 变量 ---------------------------------------## db_name = "otrade_dw" table_name = "dwd_otrade_b2b_product_shopping_cart_hi" hdfs_path = "oss://opay-datalake/otrade/otrade_dw/" + table_name config = eval(Variable.get("otrade_time_zone_config")) time_zone = config['NG']['time_zone'] ##----------------------------------------- 依赖 ---------------------------------------## ### 检查当前小时的分区依赖 ###oss://opay-datalake/otrade_all_hi/ods_binlog_base_product_shopping_cart_all_hi ods_binlog_base_product_shopping_cart_all_hi_check_task = OssSensor( task_id='ods_binlog_base_bd_admin_users_all_hi_check_task', bucket_key='{hdfs_path_str}/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str= "otrade_all_hi/ods_binlog_base_product_shopping_cart_all_hi", pt='{{ds}}', hour='{{ execution_date.strftime("%H") }}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, execution_date, **op_kwargs): dag_ids = dag.dag_id # 监控国家 v_country_code = 'NG' # 时间偏移量 v_gap_hour = 0
'email_on_retry': False, } dag = airflow.DAG( 'dim_oride_driver_base', schedule_interval="25 00 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## # 依赖前天分区 dwd_oride_driver_df_prev_day_task = OssSensor( task_id='dwd_oride_driver_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride/oride_dw/dwd_oride_driver_df/country_code=nal", pt='{{ds}}', now_day='{{macros.ds_add(ds, +1)}}', hour='{{ execution_date.strftime("%H") }}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) dwd_oride_driver_extend_df_prev_day_task = OssSensor( task_id='dwd_oride_driver_extend_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dwd_oride_driver_extend_df/country_code=nal", pt='{{ds}}', now_day='{{macros.ds_add(ds, +1)}}', hour='{{ execution_date.strftime("%H") }}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态
ods_sqoop_base_data_driver_whitelist_df_task = UFileSensor( task_id='ods_sqoop_base_data_driver_whitelist_df_task', filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride_dw_sqoop/oride_data/data_driver_whitelist", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) #路径 hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name else: print("成功") ods_sqoop_base_data_driver_whitelist_df_task = OssSensor( task_id='ods_sqoop_base_data_driver_whitelist_df_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride_dw_sqoop/oride_data/data_driver_whitelist", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # 路径 hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id msg = [{ "dag": dag, "db": "oride_dw",
filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dim_oride_driver_audit_base/country_code=nal", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name else: print("成功") ods_sqoop_base_data_driver_balance_extend_df_prev_day_tesk = OssSensor( task_id='ods_sqoop_base_data_driver_balance_extend_df_prev_day_tesk', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride_dw_sqoop/oride_data/data_driver_balance_extend", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # 依赖前一天分区 ods_sqoop_base_data_driver_records_day_df_prev_day_tesk = OssSensor( task_id="ods_sqoop_base_data_driver_records_day_df_prev_day_tesk", bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride_dw_sqoop/oride_data/data_driver_records_day", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('app_opay_active_user_cube_d', schedule_interval="30 02 * * *", default_args=args) ##----------------------------------------- 依赖 ---------------------------------------## dwd_opay_transaction_record_di_prev_day_task = OssSensor( task_id='dwd_opay_transaction_record_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "opay/opay_dw/dwd_opay_transaction_record_di/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_user_di_prev_day_task = OssSensor( task_id='ods_sqoop_base_user_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop_di/opay_user/user", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs):
default_args=args) table_names = ['ocredit_phones_dw_ods.ods_sqoop_base_t_order_df', 'ocredit_carfinance_dw_ods.ods_sqoop_base_t_order_df', 'opay_dw_ods.ods_sqoop_base_user_payment_instrument_df' ] ''' 校验分区代码 ''' phones_ods_sqoop_base_t_order_df_task = OssSensor( task_id='phones_ods_sqoop_base_t_order_df_task', bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format( hdfs_path_str="ocredit_phones_dw_sqoop/oloan/t_order", pt="{{ds}}" ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) carfinance_ods_sqoop_base_t_order_df_task = OssSensor( task_id='carfinance_ods_sqoop_base_t_order_df_task', bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format( hdfs_path_str="ocredit_carfinance_dw_sqoop/oloan_auto/t_order", pt="{{ds}}" ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag )
'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dwd_ocredit_phones_order_base_di', schedule_interval="30 01 * * *", default_args=args) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_t_order_df_task = OssSensor( task_id='ods_sqoop_base_t_order_df_task', bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format( hdfs_path_str="ocredit_phones_dw_sqoop/oloan/t_order", pt="{{ds}}"), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 变量 ---------------------------------------## db_name = "ocredit_phones_dw" table_name = "dwd_ocredit_phones_order_base_di" hdfs_path = "oss://opay-datalake/ocredit_phones/ocredit_phones_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id
bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dwd_oride_order_base_include_test_di/country_code=NG", pt='{{ds}}'), bucket_name='opay-bi', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) #路径 hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name else: print("成功") dependence_dwd_oride_order_base_include_test_di_prev_day_task = OssSensor( task_id='dwd_oride_order_base_include_test_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dwd_oride_order_base_include_test_di/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # 路径 hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id msg = [{ "dag":
db_name = "otrade_dw" table_name = "dim_otrade_b2b_retailer_info_crm_hf" hdfs_path = "oss://opay-datalake/otrade/otrade_dw/" + table_name config = eval(Variable.get("otrade_time_zone_config")) time_zone = config['NG']['time_zone'] ##----------------------------------------- 依赖 ---------------------------------------## ### 检查最新的用户表的依赖 ###oss://opay-datalake/otrade/otrade_dw/dim_otrade_b2b_city_info_hf dim_otrade_b2b_city_info_hf_check_task = OssSensor( task_id='dim_otrade_b2b_city_info_hf_check_task', bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/hour={hour}/_SUCCESS'. format( hdfs_path_str="otrade/otrade_dw/dim_otrade_b2b_city_info_hf", pt= '{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%Y-%m-%d")}}}}' .format(time_zone=time_zone, gap_hour=0), hour= '{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%H")}}}}' .format(time_zone=time_zone, gap_hour=0)), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ###oss://opay-datalake/otrade/otrade_dw/dim_otrade_b2b_bd_info_hf dim_otrade_b2b_bd_info_hf_check_task = OssSensor( task_id='dim_otrade_b2b_bd_info_hf_check_task', bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/hour={hour}/_SUCCESS'. format( hdfs_path_str="otrade/otrade_dw/dim_otrade_b2b_bd_info_hf", pt= '{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%Y-%m-%d")}}}}'
'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dwd_opay_account_balance_df', schedule_interval="30 02 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_account_user_df_prev_day_task = OssSensor( task_id='ods_sqoop_base_account_user_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop/opay_account/account_user", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) ods_sqoop_base_account_merchant_df_prev_day_task = OssSensor( task_id='ods_sqoop_base_account_merchant_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop/opay_account/account_merchant", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag )
'email_on_retry': False, } dag = airflow.DAG( 'dwm_opay_user_scenario_last_tran_df', schedule_interval="30 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## dwd_opay_transaction_record_di_prev_day_task = OssSensor( task_id='dwd_opay_transaction_record_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "opay/opay_dw/dwd_opay_transaction_record_di/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) dwm_opay_user_scenario_last_tran_df_prev_day_task = OssSensor( task_id='dwm_opay_user_scenario_last_tran_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "opay/opay_dw/dwm_opay_user_scenario_last_tran_df/country_code=NG", pt='{{macros.ds_add(ds, -1)}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
'email_on_retry': False, } dag = airflow.DAG( 'ods_sqoop_base_channel_transaction_di', schedule_interval="30 00 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_binlog_base_channel_transaction_hi_task = OssSensor( task_id='ods_binlog_base_channel_transaction_hi_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format( hdfs_path_str= "opay_binlog/opay_channel_db.opay_channel.channel_transaction", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id msg = [{ "dag": dag, "db": "opay_dw_ods", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "dt={pt}".format(pt=ds),
##----------------------------------------- 变量 ---------------------------------------## db_name = "otrade_dw" table_name = "dim_otrade_b2b_city_info_hf" hdfs_path = "oss://opay-datalake/otrade/otrade_dw/" + table_name config = eval(Variable.get("otrade_time_zone_config")) time_zone = config['NG']['time_zone'] ##----------------------------------------- 依赖 ---------------------------------------## ### 检查当前小时的分区依赖 ###oss://opay-datalake/otrade_h_his/ods_binlog_base_otrade_city_h_his ods_binlog_base_otrade_city_h_his_check_task = OssSensor( task_id='ods_binlog_base_otrade_city_h_his_check_task', bucket_key='{hdfs_path_str}/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str="otrade_h_his/ods_binlog_base_otrade_city_h_his", pt='{{ds}}', hour='{{ execution_date.strftime("%H") }}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ### 检查当前小时的分区依赖 ###oss://opay-datalake/otrade_h_his/ods_binlog_base_otrade_country_h_his ods_binlog_base_otrade_country_h_his_check_task = OssSensor( task_id='ods_binlog_base_otrade_country_h_his_check_task', bucket_key='{hdfs_path_str}/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str="otrade_h_his/ods_binlog_base_otrade_country_h_his", pt='{{ds}}', hour='{{ execution_date.strftime("%H") }}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态
'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('app_ocredit_phones_order_base_cube_w', schedule_interval="30 01 * * *", default_args=args) ##----------------------------------------- 依赖 ---------------------------------------## dwd_ocredit_phones_order_base_df_task = OssSensor( task_id='dwd_ocredit_phones_order_base_df_df_task', bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format( hdfs_path_str= "ocredit_phones/ocredit_phones_dw/dwd_ocredit_phones_order_base_df/country_code=nal", pt="{{ds}}"), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 变量 ---------------------------------------## db_name = "ocredit_phones_dw" table_name = "app_ocredit_phones_order_base_cube_w" hdfs_path = "oss://opay-datalake/ocredit_phones/ocredit_phones_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs):
'retries': 1, 'retry_delay': timedelta(minutes=5), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('app_opay_owealth_report_d_19', schedule_interval="35 18 * * *", default_args=args) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_owealth_share_order_hf_prev_day_task = OssSensor( task_id='ods_sqoop_owealth_share_order_hf_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=18/_SUCCESS'.format( hdfs_path_str="opay_owealth_sqoop_hf/opay_owealth/share_order", pt='{{macros.ds_add(ds, +1)}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_owealth_user_subscribed_hf_prev_day_task = OssSensor( task_id='ods_sqoop_base_owealth_user_subscribed_hf_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=18/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop_hf/opay_owealth/owealth_user_subscribed", pt='{{macros.ds_add(ds, +1)}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_owealth_share_trans_record_hf_prev_day_task = OssSensor( task_id='ods_sqoop_base_owealth_share_trans_record_hf_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=18/_SUCCESS'.format(
'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_oride_passenger_extend_df', schedule_interval="20 00 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## ods_binlog_data_user_extend_hi_prev_day_task = OssSensor( task_id='ods_binlog_data_user_extend_hi_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format( hdfs_path_str="oride_binlog/oride_db.oride_data.data_user_extend", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # 依赖前天分区 dwd_oride_passenger_extend_df_prev_day_tesk = OssSensor( task_id='dwd_oride_passenger_extend_df_prev_day_tesk', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dwd_oride_passenger_extend_df/country_code=nal", pt='{{macros.ds_add(ds, -1)}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
dag = airflow.DAG( 'app_oride_user_funnel_beford_d', schedule_interval="40 01 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "oride_dw" table_name = "app_oride_user_funnel_beford_d" hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name ##----------------------------------------- 依赖 ---------------------------------------## dwd_oride_client_event_detail_hi_task = OssSensor( task_id="dwd_oride_client_event_detail_hi_task", bucket_key='{hdfs_path_str}/country_code=nal/dt={pt}/hour=23/_SUCCESS'. format(hdfs_path_str="oride/oride_dw/dwd_oride_client_event_detail_hi", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # ----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id tb = [{ "dag": dag, "db": "oride_dw", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "country_code=nal/dt={pt}".format(pt=ds),
'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dwd_opay_topup_with_card_record_di', schedule_interval="20 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_user_di_prev_day_task = OssSensor( task_id='ods_sqoop_base_user_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop_di/opay_user/user", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) ods_sqoop_base_merchant_df_prev_day_task = OssSensor( task_id='ods_sqoop_base_merchant_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop/opay_merchant/merchant", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag )
hdfs_path_str= "oride/oride_dw/dm_oride_driver_order_base_cube/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # 路径 hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name else: print("成功") # 依赖前一天分区 dependence_dm_oride_order_base_d_prev_day_task = OssSensor( task_id='dm_oride_order_base_d_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dm_oride_order_base_d/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) # 依赖前一天分区 dependence_dm_oride_passenger_base_cube_prev_day_task = OssSensor( task_id='dm_oride_passenger_base_cube_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dm_oride_passenger_base_cube/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "otrade_dw" table_name = "app_otrade_b2b_order_target_retailer_di" hdfs_path = "oss://opay-datalake/otrade/otrade_dw/" + table_name config = eval(Variable.get("otrade_time_zone_config")) time_zone = config['NG']['time_zone'] ##----------------------------------------- 依赖 ---------------------------------------## dwd_otrade_b2b_order_collect_di_task = OssSensor( task_id='dwd_otrade_b2b_order_collect_di_task', bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/_SUCCESS'.format( hdfs_path_str="otrade/otrade_dw/dwd_otrade_b2b_order_collect_di", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id tb = [{ "dag": dag, "db": "otrade_dw", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "country_code=NG/dt={pt}".format(pt=ds),
'email_on_retry': False, } dag = airflow.DAG( 'dwm_opay_user_balance_df', schedule_interval="30 02 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## dwd_opay_account_balance_df_prev_day_task = OssSensor( task_id='dwd_opay_account_balance_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "opay/opay_dw/dwd_opay_account_balance_df/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id msg = [{ "dag": dag, "db": "opay_dw", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "country_code=NG/dt={pt}".format(pt=ds),
'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('ods_sqoop_base_bd_agent_df', schedule_interval="00 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_bd_agent_hi_check_task = OssSensor( task_id='ods_bd_agent_hi_check_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format( hdfs_path_str="opay_binlog/opay_agent_crm.opay_agent_crm.bd_agent", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) ods_sqoop_bd_agent_pre_check_task = OssSensor( task_id='ods_sqoop_bd_agent_pre_check_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop/opay_agent_crm/bd_agent", pt='{{macros.ds_add(ds, -1)}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag )
'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_opay_pos_transaction_record_di', schedule_interval="20 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_user_di_prev_day_task = OssSensor( task_id='ods_sqoop_base_user_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop_di/opay_user/user", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_merchant_df_prev_day_task = OssSensor( task_id='ods_sqoop_base_merchant_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop/opay_merchant/merchant", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_user_pos_transaction_record_di_prev_day_task = OssSensor( task_id='ods_sqoop_base_user_pos_transaction_record_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_oride_admin_business_df', schedule_interval="00 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_admin_business_df_task = OssSensor( task_id='ods_sqoop_base_admin_business_df_task', bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format( hdfs_path_str="oride_dw_sqoop/opay_assets/admin_business", pt="{{ds}}"), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 变量 ---------------------------------------## db_name = "oride_dw" table_name = "dwd_oride_admin_business_df" hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id
##----------------------------------------- 依赖 ---------------------------------------## # 依赖前一天分区 moto_locations_prev_day_task = HivePartitionSensor( task_id="moto_locations_prev_day_task", table="moto_locations", partition="dt='{{ds}}'", schema="oride_source", poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_oride_assets_sku_df_prev_day_task = OssSensor( task_id='ods_sqoop_base_oride_assets_sku_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="oride_dw_sqoop/opay_assets/oride_assets_sku", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id msg = [{ "dag": dag, "db": "oride_dw", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "dt={pt}".format(pt=ds),
##----------------------------------- 变量 ----------------------------------## config = eval(Variable.get("opay_time_zone_config")) time_zone = config['NG']['time_zone'] mysql_table = 'opay_dw.app_opay_cico_sum_ng_h' ##----------------------------------依赖数据源------------------------------## ### 检查当前小时的依赖 app_opay_cico_sum_ng_h_check_task = OssSensor( task_id='app_opay_cico_sum_ng_h_check_task', bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str="opay/opay_dw/app_opay_cico_sum_ng_h", pt='{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%Y-%m-%d")}}}}'.format( time_zone=time_zone, gap_hour=0), hour='{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%H")}}}}'.format( time_zone=time_zone, gap_hour=0) ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) ### 检查上一个小时的依赖 app_opay_cico_sum_ng_h_pre_check_task = OssSensor( task_id='app_opay_cico_sum_ng_h_pre_check_task', bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str="opay/opay_dw/app_opay_cico_sum_ng_h", pt='{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%Y-%m-%d")}}}}'.format( time_zone=time_zone, gap_hour=-1), hour='{{{{(execution_date+macros.timedelta(hours=({time_zone}+{gap_hour}))).strftime("%H")}}}}'.format(
.format(hdfs_path_str= "oride/oride_dw/dwd_oride_passanger_location_event_hi", pt='{{ds}}', hour='23'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name else: print("成功") dwd_oride_passanger_location_event_hi_prev_day_task = OssSensor( task_id='dwd_oride_passanger_location_event_hi_prev_day_task', bucket_key= '{hdfs_path_str}/country_code=nal/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str= "oride/oride_dw/dwd_oride_passanger_location_event_hi", pt='{{ds}}', hour='23'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id tb = [{ "dag": dag,
'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('app_opos_shop_target_d', schedule_interval="30 02 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## dwd_pre_opos_payment_order_di_task = OssSensor( task_id='dwd_pre_opos_payment_order_di_task', bucket_key='{hdfs_path_str}/country_code=nal/dt={pt}/_SUCCESS'.format( hdfs_path_str="opos/opos_dw/dwd_pre_opos_payment_order_di", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "opos_dw" table_name = "app_opos_shop_target_d" hdfs_path = "oss://opay-datalake/opos/opos_dw/" + table_name ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs):