def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 读取转换数据 -》 写入hive -》 写入mongo ''' hive_partition = partition(spark, logger) hive_partition.dropPartition( "bi.revenue_spend_coupon_cash_daily_agg_level_2", "dt", start_date, end_date) hive_partition.dropPartition( "bi.revenue_spend_coupon_cash_daily_agg_level_1", "dt", start_date, end_date) ''' 生成优惠券level2表 ''' execute_sql_level_2 = '''select NVL(a.game_id,-1) game_id, NVL(a.game_code,'') game_code, NVL(a.date,-1) date, NVL(a.package_type,-1) package_type_id, NVL(b.enum_value,'') package_type_name, NVL(a.from_app_id,-1) from_app_id, NVL(a.from_app_code, '') from_app_code, NVL(a.os_type,-1) os_type, a.recom_game_id, a.recom_game_code, a.recom_game_relation, a.dt, a.amount from ( select game_id, game_code, date, package_type, from_app_id, from_app_code, os_type, recom_game_id, recom_game_code, recom_game_relation, cast(date as string) as dt, sum(value) as amount from ( select game as game_id, gamecode as game_code, date, case when ostype = 3 then 400 else pkgtype end as package_type, fromapp as from_app_id, fromappcode as from_app_code, ostype as os_type, recomgame as recom_game_id, recomgamecode as recom_game_code, case when recomgame is not null or (recomgamecode is not null and recomgamecode!='') then 3 else 1 end as recom_game_relation, dt, value from ods.gscoupondb_consumecash where dt='%s' and date >= '%s' and date < '%s' and optypeid=12 union all select recomgame as game_id, recomgamecode as game_code, date, case when ostype = 3 then 400 else pkgtype end as package_type, fromapp as from_app_id, fromappcode as from_app_code, ostype as os_type, game as recom_game_id, gamecode as recom_game_code, 2 as recom_game_relation, dt, value from ods.gscoupondb_consumecash where dt='%s' and date >= '%s' and date < '%s' and optypeid=12 and (recomgame is not null or (recomgamecode is not null and recomgamecode!='')) ) t1 group by t1.game_id,t1.game_code,t1.date,t1.package_type,t1.from_app_id,t1.from_app_code,t1.os_type,t1.recom_game_id,t1.recom_game_code,t1.recom_game_relation ) a left join ( select * from dwd.dim_common_enum_dict where enum_type = 'pkgtype' ) b on a.package_type=b.enum_key''' % ( start_date[0:6], start_date, end_date, start_date[0:6], start_date, end_date) logger.warn(execute_sql_level_2, 'sql') df_level_2 = spark.sql(execute_sql_level_2) df_level_2.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_spend_coupon_cash_daily_agg_level_2") ''' 生成优惠券level1表 ''' execute_sql_level_1 = '''select game_id, game_code, date, package_type_id, package_type_name, from_app_id, from_app_code, os_type, recom_game_relation, sum(amount) as amount, dt from bi.revenue_spend_coupon_cash_daily_agg_level_2 where dt >= '%s' and dt < '%s' group by game_id,game_code,date,package_type_id,package_type_name,from_app_id,from_app_code,os_type,recom_game_relation,dt''' % ( start_date, end_date) logger.warn(execute_sql_level_1, 'sql') df_level_1 = spark.sql(execute_sql_level_1) df_level_1.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_spend_coupon_cash_daily_agg_level_1") ''' 插入到MongoDB ''' mongo = mongoExecute() mongo.collectionAppend( spark.sql('''select game_id as gameId, game_code as gameCode, date , package_type_id as packageTypeId, from_app_id as fromAppId, from_app_code as fromAppCode, os_type as osType, recom_game_relation as recommendRelation, amount as value , dt from bi.revenue_spend_coupon_cash_daily_agg_level_1 where dt >= '%s' and dt < '%s' ''' % (start_date, end_date)), "GameProfitDB", "coupon.brief", start_date, end_date) mongo.collectionAppend( spark.sql('''select date , game_id as gameId, game_code as gameCode, recom_game_id as relateGameId, recom_game_code as relateGameCode, package_type_id as packageTypeId, from_app_id as fromAppId, from_app_code as fromAppCode, os_type as osType, recom_game_relation as recommendRelation, amount as value , dt from bi. revenue_spend_coupon_cash_daily_agg_level_2 where dt >= '%s' and dt < '%s' ''' % (start_date, end_date)), "GameProfitDB", "coupon.detail", start_date, end_date)
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): # Queries are expressed in HiveQL currencytype(3) & currencytype(100) gspropsmalldb_mobileprops_sql = "select t1.game as game_id, " \ "t1.gamecode as game_code, " \ "t1.date as date, " \ "t1.goodsid as goods_id, " \ "t2.goods_name as goods_name, " \ "t1.pkgtype as package_type, " \ "t3.enum_value as package_type_name, " \ "t1.fromapp as from_app_id, " \ "t1.fromappcode as from_app_code, " \ "t1.ostype as os_type, " \ "t1.recomgame as recom_game_id, " \ "t1.recomgamecode as recom_game_code, " \ "case " \ "when t1.recomgame is null then 1 " \ "when t1.recomgame is not null then 3 " \ "end " \ "as recom_game_relation, " \ "sum(cash_amount) as cash_amount, " \ "sum(silver_amount) as silver_amount, " \ "max(t1.dt) as dt " \ "from " \ "(select game, " \ "gamecode, " \ "date, " \ "goodsid, " \ "case when currencytype = 100 then currencynum else 0 end as cash_amount, " \ "case when currencytype = 3 then currencynum else 0 end as silver_amount, " \ "pkgtype, " \ "fromapp, " \ "fromappcode, " \ "ostype, " \ "recomgame, " \ "recomgamecode, " \ "currencynum, " \ "dt " \ "FROM ods.gspropsmalldb_mobileprops " \ "where dt >= '%s' and dt < '%s' and currencytype in(100, 3) " \ ") t1 " \ "inner join " \ "(select goods_id, " \ "goods_name, " \ "goods_class " \ "from dwd.dim_goods_dict where goods_class = 2 " \ ") t2 " \ "on t1.goodsid = t2.goods_id " \ "left join " \ "(select enum_key, " \ "enum_value, " \ "enum_type " \ "from dwd.dim_common_enum_dict " \ "where enum_type = 'pkgtype' " \ ") t3 " \ "on t1.pkgtype = t3.enum_key " \ "group by t1.game, " \ "t1.gamecode, " \ "t1.date, " \ "t1.goodsid, " \ "t2.goods_name, " \ "t1.pkgtype, " \ "t3.enum_value, " \ "t1.fromapp, " \ "t1.fromappcode, " \ "t1.ostype, " \ "t1.recomgame, " \ "t1.recomgamecode " \ % (start_date, end_date) logger.warn(gspropsmalldb_mobileprops_sql, 'gspropsmalldb_mobileprops_sql ') gspropsmalldb_mobileprops_df = spark.sql(gspropsmalldb_mobileprops_sql) drop_partition = partition(spark, logger) drop_partition.dropPartition( "bi.revenue_income_propsmall_daily_agg_level_2_1", "dt", start_date, end_date) gspropsmalldb_mobileprops_df \ .write.partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2_1")
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 生成中间表 -》 删除最终表分区 -》插入最终表 创建表 create table if not exists bi.firstlogin_whole ( uid int, group int, app int, fromapp int, game int, channel int, promcode int, hardid string, sys int, gamecode string, datatype int, pkgtype int, fromappcode string, ostype int, jointdatafrom int, datetimes string, area_province string, area_city string, area_district string) PARTITIONED BY ( dt string) STORED AS ORC 创建中间表 create table if not exists stag.firstlogin_whole ( uid int, group int, app int, fromapp int, game int, channel int, promcode int, hardid string, sys int, gamecode string, datatype int, pkgtype int, fromappcode string, ostype int, jointdatafrom int, datetimes string, area_province string, area_city string, area_district string) STORED AS ORC ''' ''' 按时间筛选获得时间段内的最小粒度 ''' execute_sql_to_stag = '''select uid, group, app, fromapp, game, channel, promcode, hardid, sys, gamecode, datatype, pkgtype, fromappcode, ostype, jointdatafrom, min( concat( cast(date as string), case when length(time)=1 then concat('00000',cast(time as string)) when length(time)=2 then concat('0000',cast(time as string)) when length(time)=3 then concat('000',cast(time as string)) when length(time)=4 then concat('00',cast(time as string)) when length(time)=5 then concat('0',cast(time as string)) when length(time)=6 then cast(time as string) end )) datetimes, area_province, area_city, area_district from ods.gslogindb_logindetail where uid is not null and uid <> 0 and dt >=%(sdate)s and dt <%(edate)s group by uid, group, app, fromapp, game, channel, promcode, hardid, sys, gamecode, datatype, pkgtype, fromappcode, ostype, jointdatafrom, area_province, area_city, area_district ''' % { "sdate": start_date, "edate": end_date } logger.warn(execute_sql_to_stag, 'sql') df_stag = spark.sql(execute_sql_to_stag) df_stag.write.mode('overwrite').format("orc").saveAsTable( "stag.firstlogin_whole") ''' 删除最终表的分区 ''' hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.firstlogin_whole", "dt", start_date, end_date) ''' 插入最终表 ''' execute_sql_to_final = ''' select uid, group, app, fromapp, game, channel, promcode, hardid, sys, gamecode, datatype, pkgtype, fromappcode, ostype, jointdatafrom, datetimes, area_province, area_city, area_district, substr(datetimes,0,8) as dt from ( select /*+ STREAMTABLE(a) */ b.uid, b.group, b.app, b.fromapp, b.game, b.channel, b.promcode, b.hardid, b.sys, b.gamecode, b.datatype, b.pkgtype, b.fromappcode, b.ostype, b.jointdatafrom, b.area_province, b.area_city, b.area_district, b.datetimes, case when a.datetimes > b.datetimes then 1 when a.uid is null then 1 else 2 end judge from bi.firstlogin_whole a full outer join stag.firstlogin_whole b on NVL(a.uid,'')=NVL(b.uid,'') and NVL(a.group,'')=NVL(b.group,'') and NVL(a.app,'')=NVL(b.app,'') and NVL(a.fromapp,'')=NVL(b.fromapp,'') and NVL(a.game,'')=NVL(b.game,'') and NVL(a.channel,'')=NVL(b.channel,'') and NVL(a.promcode,'')=NVL(b.promcode,'') and NVL(a.hardid,'')=NVL(b.hardid,'') and NVL(a.sys,'')=NVL(b.sys,'') and NVL(a.gamecode,'')=NVL(b.gamecode,'') and NVL(a.datatype,'')=NVL(b.datatype,'') and NVL(a.pkgtype,'')=NVL(b.pkgtype,'') and NVL(a.fromappcode,'')=NVL(b.fromappcode,'') and NVL(a.ostype,'')=NVL(b.ostype,'') and NVL(a.jointdatafrom,'')=NVL(b.jointdatafrom,'') and NVL(a.area_province,'')=NVL(b.area_province,'') and NVL(a.area_city,'')=NVL(b.area_city,'') and NVL(a.area_district,'')=NVL(b.area_district,'') ) t1 where judge = 1 ''' logger.warn(execute_sql_to_final, 'sql') df_final = spark.sql(execute_sql_to_final) df_final.write.partitionBy("dt").mode("append").format("orc").saveAsTable( "bi.firstlogin_whole")
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): # logsdklog_mobile_detail_sql_1 = "select uid, " \ # "date, " \ # "time, " \ # "app_id, " \ # "app_code, " \ # "app_vers, " \ # "app_promchann, " \ # "area_address, " \ # "area_province, " \ # "area_city, " \ # "area_district, " \ # "area_ip, " \ # "duration, " \ # "session, " \ # "sys, " \ # "tcy_hardid, " \ # "tcy_imei, " \ # "tcy_mac, " \ # "geo, " \ # "geo_lat, " \ # "geo_lon, " \ # "remark_channelID, " \ # "father_code, " \ # "father_id, " \ # "father_promchann, " \ # "father_vers, " \ # "event, " \ # "eqpt_carrier, " \ # "eqpt_facturer, " \ # "eqpt_idfv, " \ # "eqpt_mobile, " \ # "eqpt_net, " \ # "eqpt_os, " \ # "eqpt_resolution-h, " \ # "eqpt_resolution-w, " \ # "eqpt_vers, " \ # "dt " \ # "from ods.logsdklog_mobile_detail " \ # "where dt >= '%s' and dt < '%s' " \ # % (start_date, end_date) # # logger.warn(logsdklog_mobile_detail_sql_1, 'logsdklog_mobile_detail_sql_1 ') # gspropsmalldb_mobileprops_df = spark.sql(logsdklog_mobile_detail_sql_1) # # gspropsmalldb_mobileprops_df.coalesce(1) # # drop_partition = partition(spark, logger) # drop_partition.dropPartition("ods.merge_logsdklog_mobile_detail_1", "dt", start_date, end_date) # gspropsmalldb_mobileprops_df \ # .coalesce(1) \ # .write \ # .partitionBy("dt") \ # .format("orc") \ # .mode("append") \ # .saveAsTable("ods.merge_logsdklog_mobile_detail_1") # # logsdklog_mobile_detail_sql = "select uid, " \ # "date, " \ # "time, " \ # "app_id, " \ # "app_code, " \ # "app_vers, " \ # "app_promchann, " \ # "area_address, " \ # "area_province, " \ # "area_city, " \ # "area_district, " \ # "area_ip, " \ # "duration, " \ # "session, " \ # "sys, " \ # "tcy_hardid, " \ # "tcy_imei, " \ # "tcy_mac, " \ # "geo, " \ # "geo_lat, " \ # "geo_lon, " \ # "remark_channelID, " \ # "father_code, " \ # "father_id, " \ # "father_promchann, " \ # "father_vers, " \ # "event, " \ # "eqpt_carrier, " \ # "eqpt_facturer, " \ # "eqpt_idfv, " \ # "eqpt_mobile, " \ # "eqpt_net, " \ # "eqpt_os, " \ # "eqpt_resolution-h, " \ # "eqpt_resolution-w, " \ # "eqpt_vers, " \ # "dt " \ # "from ods.merge_logsdklog_mobile_detail_1 " \ # "where dt >= '%s' and dt < '%s' " \ # % (start_date, end_date) # # logger.warn(logsdklog_mobile_detail_sql, 'logsdklog_mobile_detail_sql ') # gspropsmalldb_mobileprops_df = spark.sql(logsdklog_mobile_detail_sql) # # gspropsmalldb_mobileprops_df.coalesce(1) # # drop_partition = partition(spark, logger) # drop_partition.dropPartition("ods.logsdklog_mobile_detail", "dt", start_date, end_date) # gspropsmalldb_mobileprops_df \ # .coalesce(1) \ # .write \ # .partitionBy("dt") \ # .format("orc") \ # .mode("append") \ # .saveAsTable("ods.logsdklog_mobile_detail") test_store_in_orc_sql_1 = "select name, " \ "age, " \ "datetime, " \ "date as date " \ "from tempdb.test_store_in_orc " \ "where date='2018-12-05' and sex = 'female' " logger.warn(test_store_in_orc_sql_1, 'test_store_in_orc_sql_1 ') test_store_in_orc_df_1 = spark.sql(test_store_in_orc_sql_1) drop_partition = partition(spark, logger) drop_partition.dropPartition("tempdb.test_store_in_orc_1", "date", start_date, end_date) test_store_in_orc_df_1 \ .coalesce(1) \ .write \ .partitionBy("date") \ .format("orc") \ .mode("append") \ .saveAsTable("tempdb.test_store_in_orc_1") test_store_in_orc_sql = "select name, " \ "age, " \ "datetime, " \ "date as date, 'female' as sex " \ "from tempdb.test_store_in_orc_1 " \ "where date='2018-12-05' " logger.warn(test_store_in_orc_sql, 'test_store_in_orc_sql ') test_store_in_orc_df = spark.sql(test_store_in_orc_sql) # drop_partition = partition(spark, logger) # drop_partition.dropPartition("tempdb.test_store_in_orc_df", "dt", start_date, end_date) test_store_in_orc_df \ .coalesce(1) \ .write \ .partitionBy("date", "sex") \ .format("orc") \ .mode("overwrite") \ .saveAsTable("tempdb.test_store_in_orc")
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 删除分区 -》读取转换数据 -》 写入hive -》 写入mongo ''' ''' 添加无推荐关系与被推荐关系数据 ''' hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.revenue_spend_cash_daily_agg_level_2", "dt", start_date, end_date) execute_sql_level_2_step_1 = ''' select /*+ MAPJOIN(smalltable)*/ case when game_id is null then -1 else game_id end as game_id, case when game_code is null then '' else game_code end as game_code, `date` , case when item_id is null then -1 else item_id end as item_id, case when item_name is null then '' else item_name end as item_name, case when package_type is null then -1 else package_type end as package_type, case when enum_pkg.enum_value is null then '' else enum_pkg.enum_value end as package_type_name, case when from_app_id is null then -1 else from_app_id end as from_app_id, case when from_app_code is null then '' else from_app_code end as from_app_code, case when os_type is null then -1 else os_type end as os_type, recom_game_id, recom_game_code, case when recom_game_id is null or recom_game_id = '' then 1 else 3 end as recom_game_relation, item_amount, dt from ( select game_id, game_code, `date`, item_id, item_name, package_type, from_app_id, from_app_code, os_type, recom_game_id, recom_game_code, sum(item_amount) as item_amount, max(dt) as dt from ( select reward.game as game_id, reward.gamecode as game_code, reward.`date`, reward.item as item_id, dict.item_name, case when reward.ostype = 3 then 400 else reward.pkgtype end as package_type, reward.fromapp as from_app_id, reward.fromappcode as from_app_code, reward.ostype as os_type, reward.recomgame as recom_game_id, reward.recomgamecode as recom_game_code, cast(reward.itemnum * dict.item_vale * 100 as bigint) as item_amount, reward.dt from ods.gsrewardsystemdb_reward reward,dwd.dim_items_dict dict where reward.item = dict.item_id and dict.item_type_id_level_1 in (400000000,500000000) and reward.dt >= %(sdate)s and reward.dt < %(edate)s ) T1 group by game_id, game_code, `date`, item_id, item_name, package_type, from_app_id, from_app_code, os_type, recom_game_id, recom_game_code ) T2 left join dwd.dim_common_enum_dict enum_pkg on T2.package_type = enum_pkg.enum_key and enum_pkg.enum_type = 'pkgtype' ''' % { "sdate": start_date, "edate": end_date } logger.warn(execute_sql_level_2_step_1, 'sql') df_level_2_step_1 = spark.sql(execute_sql_level_2_step_1) df_level_2_step_1.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_spend_cash_daily_agg_level_2") ''' 从被推荐关系生成主推荐关系 ''' execute_sql_level_2_step_2 = ''' select recom_game_id as game_id, recom_game_code as game_code, `date`, item_id, item_name, package_type, package_type_name, from_app_id, from_app_code, os_type, game_id as recom_game_id, game_code as recom_game_code, 2 as recom_game_relation, item_amount, dt from bi.revenue_spend_cash_daily_agg_level_2 where recom_game_relation = 3 and dt >= ''' + start_date + ''' and dt < ''' + end_date logger.warn(execute_sql_level_2_step_2, 'sql') df_level_2_step_2 = spark.sql(execute_sql_level_2_step_2) df_level_2_step_2.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_spend_cash_daily_agg_level_2") ''' 从二级表去除recommand生成一级表 ''' hive_partition.dropPartition("bi.revenue_spend_cash_daily_agg_level_1", "dt", start_date, end_date) execute_sql_level_1_step_1 = ''' select game_id, game_code, `date`, item_id, item_name, package_type, package_type_name, from_app_id, from_app_code, os_type, recom_game_relation, sum(item_amount) as item_amount, dt from bi.revenue_spend_cash_daily_agg_level_2 where dt >= ''' + start_date + ''' and dt < ''' + end_date + ''' group by game_id, game_code, `date`, item_id, item_name, package_type, package_type_name, from_app_id, from_app_code, os_type, recom_game_relation, dt ''' logger.warn(execute_sql_level_1_step_1, 'sql') df_level_1_step_1 = spark.sql(execute_sql_level_1_step_1) df_level_1_step_1.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_spend_cash_daily_agg_level_1") ''' 将生成的数据增量插入至mongo中 ''' mongo = mongoExecute() out_put_level_2_sql = ''' select game_id as gameId, game_code as gameCode, `date`, item_id as itemId, item_name as itemName, package_type as packageTypeId, package_type_name as packageTypeName, from_app_id as fromAppId, case when from_app_code is null then '' else from_app_code end as fromAppCode, os_type as osType, recom_game_id as relateGameId, recom_game_code as relateGameCode, recom_game_relation as recommendRelation, item_amount as value, dt from bi.revenue_spend_cash_daily_agg_level_2 where dt >= ''' + start_date + ''' and dt < ''' + end_date level_2 = spark.sql(out_put_level_2_sql) mongo.collectionAppend(level_2, "GameProfitDB", "rmb.detail", start_date, end_date) out_put_level_1_sql = ''' select game_id as gameId, game_code as gameCode, `date`, item_id as itemId, item_name as itemName, package_type as packageTypeId, package_type_name as packageTypeName, from_app_id as fromAppId, case when from_app_code is null then '' else from_app_code end as fromAppCode, os_type as osType, recom_game_relation as recommendRelation, item_amount as value, dt from bi.revenue_spend_cash_daily_agg_level_1 where dt >= ''' + start_date + ''' and dt < ''' + end_date level_1 = spark.sql(out_put_level_1_sql) mongo.collectionAppend(level_1, "GameProfitDB", "rmb.brief", start_date, end_date)
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 删除分区 -》 中间表 -》插入最终表 ''' # 删除分区 hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.firstlogin_playtogether_room_account1st", "dt", start_date, end_date) # execute_sql_increase_table = ''' # select t.uid, # t.app, # cast(t.date as int) date, # cast(t.time as int) time, # t.group, # t.province, t.city, t.district, t.hardid, t.roomno, t.roomtype # from # (select uid, # app, date, time, group, province, city, district, hardid, roomno, roomtype, # cast(concat(cast(date as string), # case when length(time)=1 then concat('00000',cast(time as string)) # when length(time)=2 then concat('0000',cast(time as string)) # when length(time)=3 then concat('000',cast(time as string)) # when length(time)=4 then concat('00',cast(time as string)) # when length(time)=5 then concat('0',cast(time as string)) # else cast(time as string) # end) as bigint) as date_time, # row_number() over(partition by uid, app order by cast(concat(cast(date as string), # case when length(time)=1 then concat('00000',cast(time as string)) # when length(time)=2 then concat('0000',cast(time as string)) # when length(time)=3 then concat('000',cast(time as string)) # when length(time)=4 then concat('00',cast(time as string)) # when length(time)=5 then concat('0',cast(time as string)) # else cast(time as string) # end) as bigint) ) # as rn # from ods.gsplaytogetherdb_gameactive # where app is not null and group is not null and group in (6, 66, 8, 88) # ) t # where rn = 1 # ''' # # logger.warn(execute_sql_increase_table, 'sql') # # increase_table = spark.sql(execute_sql_increase_table) # # increase_table.write.mode('overwrite').format("orc").saveAsTable("stag.firstlogin_playtogether_room_account1st") # # # 中间数据插入最终表 # execute_sql_into_the_table = ''' # select # t2.uid, # t2.app, # t2.date, # t2.time, # t2.group, # t2.province, # t2.city, # t2.district, # t2.hardid, # t2.roomno, # t2.roomtype, # t2.date as dt # from stag.firstlogin_playtogether_room_account1st t2 # ''' # # logger.warn(execute_sql_into_the_table, 'sql') # # the_end_table = spark.sql(execute_sql_into_the_table) # # the_end_table.write.partitionBy("dt").mode('append').format("orc") \ # .saveAsTable("bi.firstlogin_playtogether_room_account1st") # # ''' # 将生成的数据增量插入至mongo中 # ''' # mongo = mongoExecute() # # mongo.collectionAppend(the_end_table, "bi", "firstlogin_playtogether_room_account1st", start_date, end_date) # # logger.warn("Job over", "banner") execute_sql_increase_table = ''' select t2.uid, t2.app, t2.date, t2.time, t2.group, t2.province, t2.city, t2.district, t2.hardid, t2.roomno, t2.roomtype from (select uid, app, date, time, group, province, city, district, hardid, roomno, roomtype from bi.firstlogin_playtogether_room_account1st ) t1 right join (select t.uid uid, t.app app, cast(t.date as int) date, cast(t.time as int) time, t.group group, t.province province, t.city city, t.district district, t.hardid hardid, t.roomno roomno, t.roomtype roomtype from (select uid, app, date, time, group, province, city, district, hardid, roomno, roomtype, cast(concat(cast(date as string), case when length(time)=1 then concat('00000',cast(time as string)) when length(time)=2 then concat('0000',cast(time as string)) when length(time)=3 then concat('000',cast(time as string)) when length(time)=4 then concat('00',cast(time as string)) when length(time)=5 then concat('0',cast(time as string)) else cast(time as string) end) as bigint) as date_time, row_number() over(partition by uid, app order by cast(concat(cast(date as string), case when length(time)=1 then concat('00000',cast(time as string)) when length(time)=2 then concat('0000',cast(time as string)) when length(time)=3 then concat('000',cast(time as string)) when length(time)=4 then concat('00',cast(time as string)) when length(time)=5 then concat('0',cast(time as string)) else cast(time as string) end) as bigint)) as rn from ods.gsplaytogetherdb_gameactive where app is not null and group is not null and group in (6, 66, 8, 88) ) t where rn = 1 ) t2 on t1.uid = t2.uid and t1.app = t2.app where t1.uid is null ''' logger.warn(execute_sql_increase_table, 'sql') increase_table = spark.sql(execute_sql_increase_table) increase_table.write.mode('overwrite').format("orc").saveAsTable( "stag.firstlogin_playtogether_room_account1st") # 中间数据插入最终表 execute_sql_into_the_table = ''' select t2.uid, t2.app, t2.date, t2.time, t2.group, t2.province, t2.city, t2.district, t2.hardid, t2.roomno, t2.roomtype, t2.date as dt from stag.firstlogin_playtogether_room_account1st t2 ''' logger.warn(execute_sql_into_the_table, 'sql') the_end_table = spark.sql(execute_sql_into_the_table) the_end_table.write.partitionBy("dt").mode('append').format("orc") \ .saveAsTable("bi.firstlogin_playtogether_room_account1st") ''' 将生成的数据增量插入至mongo中 ''' mongo = mongoExecute() mongo.collectionAppend(the_end_table, "bi", "firstlogin_playtogether_room_account1st", start_date, end_date) logger.warn("Job over", "banner")
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): # Queries are expressed in HiveQL currencytype(3) & currencytype(100) gspropsmalldb_mobileprops_sql = "select t1.game as game_id, " \ "t1.gamecode as game_code, " \ "t1.date as date, " \ "t1.goodsid as goods_id, " \ "t2.goods_name as goods_name, " \ "t1.pkgtype as package_type, " \ "t3.enum_value as package_type_name, " \ "t1.fromapp as from_app_id, " \ "t1.fromappcode as from_app_code, " \ "t1.ostype as os_type, " \ "t1.recomgame as recom_game_id, " \ "t1.recomgamecode as recom_game_code, " \ "case " \ "when t1.recomgame is null then 1 " \ "when t1.recomgame is not null then 3 " \ "end " \ "as recom_game_relation, " \ "sum(cash_amount) as cash_amount, " \ "sum(silver_amount) as silver_amount, " \ "max(t1.dt) as dt " \ "from " \ "(select game, " \ "gamecode, " \ "date, " \ "goodsid, " \ "case when currencytype = 100 then currencynum else 0 end as cash_amount, " \ "case when currencytype = 3 then currencynum else 0 end as silver_amount, " \ "pkgtype, " \ "fromapp, " \ "fromappcode, " \ "ostype, " \ "recomgame, " \ "recomgamecode, " \ "currencynum, " \ "date as dt " \ "FROM ods.gspropsmalldb_mobileprops " \ "where dt >= '%s' and dt < '%s' and currencytype in(100, 3) " \ ") t1 " \ "inner join " \ "(select goods_id, " \ "goods_name, " \ "goods_class " \ "from dwd.dim_goods_dict where goods_class = 2 " \ ") t2 " \ "on t1.goodsid = t2.goods_id " \ "left join " \ "(select enum_key, " \ "enum_value, " \ "enum_type " \ "from dwd.dim_common_enum_dict " \ "where enum_type = 'pkgtype' " \ ") t3 " \ "on t1.pkgtype = t3.enum_key " \ "group by t1.game, " \ "t1.gamecode, " \ "t1.date, " \ "t1.goodsid, " \ "t2.goods_name, " \ "t1.pkgtype, " \ "t3.enum_value, " \ "t1.fromapp, " \ "t1.fromappcode, " \ "t1.ostype, " \ "t1.recomgame, " \ "t1.recomgamecode " \ % (start_date, end_date) logger.warn(gspropsmalldb_mobileprops_sql, 'gspropsmalldb_mobileprops_sql ') gspropsmalldb_mobileprops_df = spark.sql(gspropsmalldb_mobileprops_sql) drop_partition = partition(spark, logger) drop_partition.dropPartition( "bi.revenue_income_propsmall_daily_agg_level_2_1", "dt", start_date, end_date) gspropsmalldb_mobileprops_df \ .write.partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2_1") gspaydb_basic_sql = "select t1.game as game_id, " \ "t1.gamecode as game_code, " \ "t1.paydate as date, " \ "t1.gamegoodsid as goods_id, " \ "t2.goods_name as goods_name, " \ "t1.pkgtype as package_type, " \ "t3.enum_value as package_type_name, " \ "t1.fromapp as from_app_id, " \ "t1.fromappcode as from_app_code, " \ "t1.ostype as os_type, " \ "t1.recomgame as recom_game_id, " \ "t1.recomgamecode as recom_game_code, " \ "case " \ "when t1.recomgame is null then 1 " \ "when t1.recomgame is not null then 3 " \ "end " \ "as recom_game_relation," \ "sum(t1.price) as cash_amount, " \ "0 as silver_amount, " \ "t1.dt " \ "from " \ "(select game, " \ "gamecode, " \ "paydate, " \ "gamegoodsid ," \ "pkgtype, " \ "fromapp, " \ "fromappcode, " \ "ostype, " \ "recomgame, " \ "recomgamecode, " \ "prodver, " \ "price, " \ "paydate as dt " \ "FROM ods.gspaydb_basic " \ "where dt='%s' and paydate >= '%s' and paydate < '%s' " \ "and (prodver is null or prodver = '') and product = 6001 " \ ") t1 " \ "inner join " \ "(select goods_id, " \ "goods_name, " \ "goods_class " \ "from dwd.dim_goods_dict where goods_class = 2 " \ ") t2 " \ "on t1.gamegoodsid = t2.goods_id " \ "left join " \ "(select enum_key, " \ "enum_value, " \ "enum_type " \ "from dwd.dim_common_enum_dict " \ "where enum_type = 'pkgtype' " \ ") t3 " \ "on t1.pkgtype = t3.enum_key " \ "group by t1.game, " \ "t1.gamecode, " \ "t1.paydate, " \ "t1.gamegoodsid, " \ "t2.goods_name, " \ "t1.pkgtype, " \ "t3.enum_value, " \ "t1.fromapp, " \ "t1.fromappcode, " \ "t1.ostype, " \ "t1.recomgame, " \ "t1.recomgamecode, " \ "t1.dt " \ % (start_date[0:6], start_date, end_date) logger.warn(gspaydb_basic_sql, 'gspaydb_basic_sql ') gspaydb_basic_df = spark.sql(gspaydb_basic_sql) gspaydb_basic_df \ .write.partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2_1") # insert to agg agg_level_2 agg_level_2_sql = "select game_id, " \ "game_code, " \ "date, " \ "goods_id, " \ "goods_name, " \ "package_type, " \ "package_type_name, " \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "recom_game_id, " \ "recom_game_code, " \ "recom_game_relation, " \ "sum(cash_amount) as cash_amount, " \ "sum(silver_amount) as silver_amount, " \ "dt " \ "from bi.revenue_income_propsmall_daily_agg_level_2_1 " \ "where dt >= '%s' and dt < '%s' " \ "group by game_id, game_code, date, goods_id, goods_name, package_type, " \ "package_type_name, from_app_id, from_app_code, os_type, recom_game_id, " \ "recom_game_code, recom_game_relation, dt " \ % (start_date, end_date) logger.warn(agg_level_2_sql, 'agg_level_2_sql ') agg_level_2_df = spark.sql(agg_level_2_sql) drop_partition.dropPartition( "bi.revenue_income_propsmall_daily_agg_level_2", "dt", start_date, end_date) agg_level_2_df \ .write \ .partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2") main_recommendation_sql = "select recom_game_id as game_id, " \ "recom_game_code as game_code, " \ "date, " \ "goods_id, " \ "goods_name, " \ "package_type, " \ "package_type_name, " \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "game_id as recom_game_id, " \ "game_code as recom_game_code, " \ "2 as recom_game_relation, " \ "cash_amount, " \ "silver_amount, " \ "dt " \ "from bi.revenue_income_propsmall_daily_agg_level_2 " \ "where dt >= '%s' and dt < '%s'" \ " and recom_game_relation = 3 " \ % (start_date, end_date) logger.warn(main_recommendation_sql, 'main_recommendation_sql ') main_recommendation_df = spark.sql(main_recommendation_sql) main_recommendation_df \ .write \ .partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2") # insert to agg_level_1 agg_level_1_sql = "select game_id, " \ "game_code, " \ "date, " \ "goods_id, " \ "goods_name, " \ "package_type, " \ "package_type_name, " \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "recom_game_relation, " \ "sum(cash_amount) as cash_amount, " \ "sum(silver_amount) as silver_amount, " \ "dt " \ "from bi.revenue_income_propsmall_daily_agg_level_2 " \ "where dt >= '%s' and dt < '%s' " \ "group by game_id, " \ "game_code, " \ "date, " \ "goods_id, " \ "goods_name, " \ "package_type, " \ "package_type_name, " \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "recom_game_relation, " \ "dt " \ % (start_date, end_date) drop_partition.dropPartition( "bi.revenue_income_propsmall_daily_agg_level_1", "dt", start_date, end_date) # drop_partition.dropPartition("bi.revenue_spend_exchange_daily_agg_level_1", "dt", '20181107', '20181108') logger.warn(agg_level_1_sql, 'agg_level_1_sql ') agg_level_1_df = spark.sql(agg_level_1_sql) agg_level_1_df \ .write \ .partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_1") # insert into agg_level_1 to mongoDB insert_mongo_agg_level_1_sql = "select game_id as gameId, " \ "if(game_code is null or game_code = '', '', game_code) as gameCode, " \ "date as date, " \ "if(goods_id is null or goods_id = '', -1, goods_id) as propId, " \ "if(goods_name is null or goods_name = '', '', goods_name) as propName, " \ "if(package_type is null or package_type = '', -1, package_type) " \ "as packageTypeId, " \ "if(package_type_name is null or package_type_name = '', '', package_type_name) " \ "as packageTypeName, " \ "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \ "case " \ "when from_app_code is null then ' ' " \ "when from_app_code = '' then ' ' " \ "else from_app_code " \ "end " \ "as fromAppCode, " \ "if(os_type is null or os_type = '', -1, os_type) as osType, " \ "if(recom_game_relation is null or recom_game_relation = '', -1, " \ "recom_game_relation) as recommendRelation, " \ "if(cash_amount is null or cash_amount = '', 0, cash_amount) as money, " \ "if(silver_amount is null or silver_amount = '', 0, silver_amount) as silvers, " \ "dt " \ "from bi.revenue_income_propsmall_daily_agg_level_1 " \ "where dt >= '%s' and dt < '%s' " \ % (start_date, end_date) logger.warn(insert_mongo_agg_level_1_sql, 'insert_mongo_agg_level_1_sql ') insert_mongo_agg_level_1_df = spark.sql(insert_mongo_agg_level_1_sql) mongo = mongoExecute() mongo.collectionAppend(insert_mongo_agg_level_1_df, "GameProfitDB", "unique_prop_income.brief", start_date, end_date) # insert into agg_level_2 to mongoDB insert_mongo_agg_level_2_sql = "select game_id as gameId, " \ "if(game_code is null or game_code = '', '', game_code) as gameCode, " \ "date as date, " \ "if(goods_id is null or goods_id = '', -1, goods_id) as propId, " \ "if(goods_name is null or goods_name = '', '', goods_id) as propName, " \ "if(package_type is null or package_type = '', -1, package_type) " \ "as packageTypeId, " \ "if(package_type_name is null or package_type_name = '', '', package_type_name) " \ "as packageTypeName, " \ "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \ "case " \ "when from_app_code is null then ' ' " \ "when from_app_code = '' then ' ' " \ "else from_app_code " \ "end " \ "as fromAppCode, " \ "if(os_type is null or os_type = '', -1, os_type) as osType, " \ "recom_game_id as relateGameId, " \ "recom_game_code as relateGameCode, " \ "recom_game_relation as recommendRelation, " \ "if(cash_amount is null or cash_amount = '', 0, cash_amount) as money, " \ "if(silver_amount is null or silver_amount = '', 0, silver_amount) as silvers, " \ "dt " \ "from bi.revenue_income_propsmall_daily_agg_level_2 " \ "where dt >= '%s' and dt < '%s' " \ % (start_date, end_date) # % (20181108, 20181109) logger.warn(insert_mongo_agg_level_2_sql, 'insert_mongo_agg_level_2_sql ') insert_mongo_agg_level_2_df = spark.sql(insert_mongo_agg_level_2_sql) mongo.collectionAppend(insert_mongo_agg_level_2_df, "GameProfitDB", "unique_prop_income.detail", start_date, end_date)
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 读取转换数据 -》 写入hive -》 写入mongo ''' hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.revenue_silver_daily_agg_level_2", "dt", start_date, end_date) hive_partition.dropPartition("bi.revenue_silver_daily_agg_level_1", "dt", start_date, end_date) ''' 生成银子level2表 ''' execute_sql_level_2 = '''select NVL(t2.game_id, -1) game_id, NVL(t2.game_code, '') game_code, NVL(t2.date,-1) date, NVL(t2.op_id,-1) op_id, NVL(t2.op_name,'') op_name, NVL(t2.package_type,-1) package_type, NVL(t3.enum_value,'') as package_type_name, NVL(t2.from_app_id,-1) from_app_id, NVL(t2.from_app_code, '') from_app_code, NVL(t2.os_type,-1) os_type, NVL(t2.op_type_classified_name,'') op_type_classified_name, t2.recom_game_id, t2.recom_game_code, t2.recom_game_relation, t2.silver_amount, t2.dt from ( select t1.game_id, t1.game_code, t1.date, t1.op_id, t1.op_name, case when t1.os_type_id = 3 then 400 else t1.package_type_id end as package_type, t1.from_app_id, t1.from_app_code, t1.os_type_id as os_type, t1.op_type_classified_name, t1.recom_game_id, t1.recom_game_code, t1.recom_game_relation, sum(abs(t1.silver_deposit)) as silver_amount, max(t1.dt) as dt from ( select recom_game_id as game_id, recom_game_code as game_code, date, op_id, op_name, package_type_id, from_app_id, from_app_code, os_type_id, game_id as recom_game_id, game_code as recom_game_code, 2 as recom_game_relation, silver_deposit, case when op_type_id in(4,5) then 0 when op_type_id in(1,3) then 1 end as op_type_classified_name, dt from dwd.fact_silver_detail where dt >= '%s' and dt < '%s' and log_source='gamelog' and op_type_id in(1,3,4,5) and (recom_game_id is not null or (recom_game_code is not null and recom_game_code!='')) union all select game_id, game_code, date, op_id, op_name, package_type_id, from_app_id, from_app_code, os_type_id, recom_game_id, recom_game_code, case when recom_game_id is not null or (recom_game_code is not null and recom_game_code!='') then 3 else 1 end as recom_game_relation, silver_deposit, case when op_type_id in(4,5) then 0 when op_type_id in(1,3) then 1 end as op_type_classified_name, dt from dwd.fact_silver_detail where dt >= '%s' and dt < '%s' and log_source='gamelog' and op_type_id in(1,3,4,5) ) t1 group by t1.game_id,t1.game_code,t1.date,t1.op_id,t1.op_name,t1.package_type_id,t1.from_app_id,t1.from_app_code,t1.os_type_id,t1.op_type_classified_name,t1.recom_game_relation,t1.recom_game_id,t1.recom_game_code ) t2 left join ( select * from dwd.dim_common_enum_dict where enum_type = 'pkgtype' ) t3 on t2.package_type=t3.enum_key''' % (start_date, end_date, start_date, end_date) logger.warn(execute_sql_level_2, 'sql') df_level_2 = spark.sql(execute_sql_level_2) df_level_2.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_silver_daily_agg_level_2") ''' 生成银子level1表 ''' execute_sql_level_1 = '''select game_id, game_code, date, op_id, op_name, package_type, package_type_name, from_app_id, from_app_code, os_type, op_type_classified_name, recom_game_relation, sum(silver_amount) as silver_amount, dt from bi.revenue_silver_daily_agg_level_2 where dt >= '%s' and dt < '%s' group by game_id, game_code, date, op_id, op_name, package_type, package_type_name, from_app_id, from_app_code, os_type, op_type_classified_name, recom_game_relation, dt''' % (start_date, end_date) logger.warn(execute_sql_level_1, 'sql') df_level_1 = spark.sql(execute_sql_level_1) df_level_1.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.revenue_silver_daily_agg_level_1") ''' 插入到MongoDB ''' mongo = mongoExecute() mongo.collectionAppend( spark.sql('''select game_id as gameId, game_code as gameCode, date , op_id as operateId, op_name as operateName, package_type as packageTypeId, from_app_id as fromAppId, from_app_code as fromAppCode, os_type as osType, recom_game_relation as recommendRelation, silver_amount as silvers, op_type_classified_name as type, dt from bi.revenue_silver_daily_agg_level_1 where dt >= '%s' and dt < '%s' ''' % (start_date, end_date)), "GameProfitDB", "silver.brief", start_date, end_date) mongo.collectionAppend( spark.sql('''select game_id as gameId, game_code as gameCode, date , op_id as operateId, package_type as packageTypeId, from_app_id as fromAppId, from_app_code as fromAppCode, os_type as osType, recom_game_id as relateGameId, recom_game_code as relateGameCode, recom_game_relation as recommendRelation, silver_amount as silvers, op_type_classified_name as type, dt from bi.revenue_silver_daily_agg_level_2 where dt >= '%s' and dt < '%s' ''' % (start_date, end_date)), "GameProfitDB", "silver.detail", start_date, end_date)
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): gsgiftcoupon_acquiregc_sql = "select t1.game as game_id, " \ "t1.gamecode as game_code, " \ "t1.date as date, " \ "t1.pkgtype as package_type, " \ "t2.enum_value as package_type_name, " \ "t1.fromapp as from_app_id, " \ "t1.fromappcode as from_app_code, " \ "t1.ostype as os_type, " \ "t1.recomgame as recom_game_id, " \ "t1.recomgamecode as recom_game_code, " \ "case " \ "when t1.recomgame is null then 1 " \ "when t1.recomgame is not null then 3 " \ "end " \ "as recom_game_relation, " \ "sum(t1.num) as giftcoupon_amount, " \ "max(t1.dt) as dt " \ "from " \ "(select game, " \ "gamecode, " \ "date, " \ "pkgtype, " \ "fromapp, " \ "fromappcode, " \ "ostype, " \ "recomgame, " \ "recomgamecode, " \ "num, " \ "dt " \ "from ods.gsgiftcoupondb_acquiregc " \ "where dt >= '%s' and dt < '%s' " \ ") t1 " \ "left join " \ "(select enum_key, " \ "enum_value, " \ "enum_type " \ "from dwd.dim_common_enum_dict " \ "where enum_type = 'pkgtype' " \ ") t2 " \ "on t1.pkgtype = t2.enum_key " \ "group by t1.game, " \ "t1.gamecode, " \ "t1.date, " \ "t1.pkgtype, " \ "t2.enum_value, " \ "t1.fromapp, " \ "t1.fromappcode, " \ "t1.ostype, " \ "t1.recomgame, " \ "t1.recomgamecode " \ % (start_date, end_date) \ # % (20181107, 20181108) logger.warn(gsgiftcoupon_acquiregc_sql, 'gsgiftcoupon_acquiregc_sql') gsgiftcoupon_acquiregc_df = spark.sql(gsgiftcoupon_acquiregc_sql) hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.revenue_spend_exchange_daily_agg_level_2", "dt", start_date, end_date) gsgiftcoupon_acquiregc_df \ .write.partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_spend_exchange_daily_agg_level_2") # gsgiftcoupon_acquiregc_df \ # .write \ # .format("orc") \ # .insertInto("bi.revenue_spend_exchange_daily_agg_level_2") main_recommendation_sql = "select recom_game_id as game_id, " \ "recom_game_code as game_code, " \ "date, " \ "package_type, " \ "package_type_name, " \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "game_id as recom_game_id, " \ "game_code as recom_game_code, " \ "2 as recom_game_relation," \ "giftcoupon_amount, " \ "dt " \ "from bi.revenue_spend_exchange_daily_agg_level_2 " \ "where dt >= '%s' and dt < '%s' " \ " and recom_game_relation = 3 " \ % (start_date, end_date) \ # % (20181107, 20181108) logger.warn(main_recommendation_sql, 'main_recommendation_sql') main_recommendation_df = spark.sql(main_recommendation_sql) main_recommendation_df \ .write \ .partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_spend_exchange_daily_agg_level_2") # insert to agg_level_1 agg_level_1_sql = "select game_id, " \ "game_code, " \ "date, " \ "package_type, " \ "package_type_name, " \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "recom_game_relation, " \ "sum(giftcoupon_amount) as giftcoupon_amount, " \ "dt " \ "from bi.revenue_spend_exchange_daily_agg_level_2 " \ "where dt >= '%s' and dt < '%s' " \ "group by " \ "game_id, " \ "game_code, " \ "date, " \ "package_type, " \ "package_type_name," \ "from_app_id, " \ "from_app_code, " \ "os_type, " \ "recom_game_relation, " \ "dt " \ % (start_date, end_date) # % (20181107, 20181108) agg_level_1_partition = partition(spark, logger) agg_level_1_partition.dropPartition( "bi.revenue_spend_exchange_daily_agg_level_1", "dt", start_date, end_date) logger.warn(agg_level_1_sql, 'agg_level_1_sql ') agg_level_1_df = spark.sql(agg_level_1_sql) agg_level_1_df \ .write \ .partitionBy("dt") \ .format("orc") \ .mode("append") \ .saveAsTable("bi.revenue_spend_exchange_daily_agg_level_1") # agg_level_1 into mongoDB insert_mongo_agg_level_1_sql = "SELECT game_id as gameId, " \ "if(game_code is null or game_code = '', '', game_code) as gameCode, " \ "date as date, " \ "if(package_type is null or package_type = '', -1, package_type) " \ "as packageTypeId, " \ "if(package_type_name is null or package_type_name = '', '', package_type_name) " \ "as packageTypeName, " \ "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \ "case " \ "when from_app_code is null then ' ' " \ "when from_app_code = '' then ' ' " \ "else from_app_code " \ "end " \ "as fromAppCode, " \ "if(os_type is null or os_type = '', -1, os_type) as osType, " \ "recom_game_relation as recommendRelation, " \ "if(giftcoupon_amount is null or giftcoupon_amount = '', 0, giftcoupon_amount) " \ "as count, " \ "dt " \ "FROM bi.revenue_spend_exchange_daily_agg_level_1 " \ "where dt >= '%s' and dt < '%s'" \ % (start_date, end_date) # % (20181107, 20181108) logger.warn(insert_mongo_agg_level_1_sql, 'insert_mongo_agg_level_1_sql ') insert_mongo_agg_level_1_df = spark.sql(insert_mongo_agg_level_1_sql) mongo = mongoExecute() mongo.collectionAppend(insert_mongo_agg_level_1_df, "GameProfitDB", "voucher.brief", start_date, end_date) # insert_mongo_agg_level_1_df\ # .write\ # .format("com.mongodb.spark.sql.DefaultSource")\ # .mode('overwrite')\ # .option("database", "bi")\ # .option("collection", "revenue_spend_exchange_daily_agg_level_1")\ # .save() # agg_level_2 into mongoDB insert_mongo_agg_level_2_sql = "SELECT game_id as gameId, " \ "if(game_code is null or game_code = '', '', game_code) as gameCode, " \ "date as date, " \ "if(package_type is null or package_type = '', -1, package_type) " \ "as packageTypeId, " \ "if(package_type_name is null or package_type_name = '', '', package_type_name) " \ "as packageTypeName, " \ "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \ "case " \ "when from_app_code is null then ' ' " \ "when from_app_code = '' then ' ' " \ "else from_app_code " \ "end " \ "as fromAppCode, " \ "if(os_type is null or os_type = '', -1, os_type) as osType, " \ "recom_game_id as relateGameId, " \ "recom_game_code as relateGameCode, " \ "recom_game_relation as recommendRelation, " \ "if(giftcoupon_amount is null or giftcoupon_amount = '', 0, giftcoupon_amount) " \ "as count, " \ "dt " \ "FROM bi.revenue_spend_exchange_daily_agg_level_2 " \ "where dt >= '%s' and dt < '%s'" \ % (start_date, end_date) \ # % (20181107, 20181108) logger.warn(insert_mongo_agg_level_2_sql, 'insert_mongo_agg_level_2_sql ') insert_mongo_agg_level_2_df = spark.sql(insert_mongo_agg_level_2_sql) mongo.collectionAppend(insert_mongo_agg_level_2_df, "GameProfitDB", "voucher.detail", start_date, end_date)
def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 删除分区 -》 中间表 -》插入最终表 ''' # 删除分区 hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.firstlogin_mobile_lianyun_user", "dt", start_date, end_date) execute_sql_increase_table = ''' select t6.uid, t6.date, t6.time, t6.channel, t6.group, t6.ostype, t6.jointdatafrom from (select uid from bi.firstlogin_mobile_lianyun_user ) t5 right join (select t3.uid, cast(substr(t3.datetimes,1,8) as int) date, cast(substr(t3.datetimes,9) as int) time, t4.channel, t4.group, t4.ostype, t4.jointdatafrom from ( select t2.uid uid, min(t2.datetimes) datetimes from (select app_id from dwd.dim_game_dict where run_type = '联运_手游联运' ) t1 left join (select app, uid, datetimes from bi.firstlogin_whole where dt>=%(sdate)s and dt < %(edate)s and uid is not null group by app, uid, datetimes ) t2 on t1.app_id = t2.app where t2.uid is not null group by t2.uid ) t3 left join (select t.uid uid, t.datetimes datetimes, t.channel channel, t.group group, t.ostype ostype, t.jointdatafrom jointdatafrom from (select uid, datetimes, channel, group, ostype, jointdatafrom, row_number() over(partition by uid order by datetimes) as rn from bi.firstlogin_whole where dt>=%(sdate)s and dt < %(edate)s and uid is not null group by uid,channel,group,ostype,jointdatafrom,datetimes ) t where rn =1 ) t4 on t3.datetimes = t4.datetimes and t3.uid = t4.uid ) t6 on t5.uid = t6.uid where t5.uid is null ''' % { "sdate": start_date, "edate": end_date } logger.warn(execute_sql_increase_table, 'sql') increase_table = spark.sql(execute_sql_increase_table) increase_table.write.mode('overwrite').format("orc").saveAsTable( "stag.firstlogin_mobile_lianyun_user") # 中间数据插入最终表 execute_sql_into_the_table = '''select t6.uid, t6.date, t6.time, t6.channel, t6.group, t6.ostype, t6.jointdatafrom, t6.date as dt from stag.firstlogin_mobile_lianyun_user t6 ''' logger.warn(execute_sql_into_the_table, 'sql') the_end_table = spark.sql(execute_sql_into_the_table) the_end_table.write.partitionBy("dt").mode('append').format( "orc").saveAsTable("bi.firstlogin_mobile_lianyun_user") ''' 将生成的数据增量插入至mongo中 ''' mongo = mongoExecute() mongo.collectionAppend(the_end_table, "bi", "firstlogin_mobile_lianyun_user", start_date, end_date) logger.warn("Job over", "banner")
def logic(start_date=DEFAULT_START_DATE,end_date=DEFAULT_END_DATE): ''' 处理逻辑 大致步骤 删除分区 -》 中间表 -》插入最终表 ''' # 删除分区 hive_partition = partition(spark, logger) hive_partition.dropPartition("bi.firstlogin_yaodou_user_game", "dt", start_date, end_date) execute_sql_increase_table = ''' select t6.uid, t6.date, t6.group, t6.game from (select uid, game from bi.firstlogin_yaodou_user_game ) t5 right join (select t3.uid uid, cast(substr(t3.datetimes,1,8) as int) date, t4.group group, t3.game game from (select t1.game_id game, t2.uid uid, t2.datetimes datetimes from (select game_id from dwd.dim_game_dict where run_type = '联运_妖豆' ) t1 left join (select game, uid, min(datetimes) datetimes from bi.firstlogin_whole where dt>=%(sdate)s and dt < %(edate)s and uid is not null and game is not null and group > 0 and group not in (6,66,8,88,68,69,55) group by game, uid ) t2 on t1.game_id = t2.game where t2.uid is not null group by t1.game_id, t2.uid, t2.datetimes ) t3 left join (select t.uid uid, t.game game, t.datetimes datetimes, t.group group from (select uid, game, min(datetimes) as datetimes, group, row_number() over(partition by uid, game order by datetimes) as rn from bi.firstlogin_whole where dt>=%(sdate)s and dt < %(edate)s and uid is not null group by uid,game,group,datetimes ) t where rn =1 ) t4 on t3.datetimes = t4.datetimes and t3.uid = t4.uid and t3.game = t4.game ) t6 on t5.uid = t6.uid and t5.game = t6.game where t5.uid is null ''' %{"sdate":start_date,"edate":end_date} logger.warn(execute_sql_increase_table,'sql') increase_table = spark.sql(execute_sql_increase_table) increase_table.write.mode('overwrite').format("orc").saveAsTable("stag.firstlogin_yaodou_user_game") # 中间数据插入最终表 execute_sql_into_the_table = '''select t6.uid, t6.date, t6.group, t6.game, t6.date as dt from stag.firstlogin_yaodou_user_game t6 ''' logger.warn(execute_sql_into_the_table, 'sql') the_end_table = spark.sql(execute_sql_into_the_table) the_end_table.write.partitionBy("dt").mode('append').format("orc").saveAsTable("bi.firstlogin_yaodou_user_game") ''' 将生成的数据增量插入至mongo中 ''' mongo = mongoExecute() mongo.collectionAppend(the_end_table,"bi","firstlogin_yaodou_user_game", start_date, end_date) logger.warn("Job over", "banner")