Python partition 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils.HiveAlter

메소드/함수: partition

hotexamples.com에서의 예제들: 11

Python partition - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.HiveAlter.partition에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    '''
    处理逻辑
    大致步骤  读取转换数据 -》 写入hive  -》 写入mongo
    '''

    hive_partition = partition(spark, logger)
    hive_partition.dropPartition(
        "bi.revenue_spend_coupon_cash_daily_agg_level_2", "dt", start_date,
        end_date)
    hive_partition.dropPartition(
        "bi.revenue_spend_coupon_cash_daily_agg_level_1", "dt", start_date,
        end_date)
    '''
    生成优惠券level2表
    '''

    execute_sql_level_2 = '''select
                    NVL(a.game_id,-1) game_id,
                    NVL(a.game_code,'') game_code,
                    NVL(a.date,-1) date,
                    NVL(a.package_type,-1) package_type_id,
                    NVL(b.enum_value,'') package_type_name,
                    NVL(a.from_app_id,-1) from_app_id,
                    NVL(a.from_app_code, '') from_app_code,
                    NVL(a.os_type,-1) os_type,
                    a.recom_game_id,
                    a.recom_game_code,
                    a.recom_game_relation,
                    a.dt,
                    a.amount
                    from 
                    (
                    select 
                    game_id,
                    game_code,
                    date,
                    package_type,
                    from_app_id,
                    from_app_code,
                    os_type,
                    recom_game_id,
                    recom_game_code,
                    recom_game_relation,
                    cast(date as string) as dt,
                    sum(value) as amount
                    from 
                    (
                    select
                    game as game_id,
                    gamecode as game_code, 
                    date, 
                    case when ostype = 3 then 400 else pkgtype end as package_type,
                    fromapp as from_app_id, 
                    fromappcode as from_app_code, 
                    ostype as os_type, 
                    recomgame as recom_game_id, 
                    recomgamecode as recom_game_code,
                    case 
                    when recomgame is not null or (recomgamecode is not null and recomgamecode!='') then  3
                    else 1
                    end as recom_game_relation,
                    dt,
                    value
                    from 
                    ods.gscoupondb_consumecash
                    where dt='%s' and date >= '%s' and date < '%s' and optypeid=12 
                    union all
                    select
                    recomgame as game_id,
                    recomgamecode as game_code, 
                    date, 
                    case when ostype = 3 then 400 else pkgtype end as package_type,
                    fromapp as from_app_id, 
                    fromappcode as from_app_code, 
                    ostype as os_type, 
                    game as recom_game_id, 
                    gamecode as recom_game_code,
                    2 as recom_game_relation,
                    dt,
                    value
                    from 
                    ods.gscoupondb_consumecash
                    where dt='%s' and date >= '%s' and date < '%s' and optypeid=12 and (recomgame is not null or (recomgamecode is not null and recomgamecode!=''))
                    ) t1
                    group by t1.game_id,t1.game_code,t1.date,t1.package_type,t1.from_app_id,t1.from_app_code,t1.os_type,t1.recom_game_id,t1.recom_game_code,t1.recom_game_relation
                    ) a
                    left join 
                    (
                    select * from dwd.dim_common_enum_dict 
                    where enum_type = 'pkgtype'  
                    ) b
                    on a.package_type=b.enum_key''' % (
        start_date[0:6], start_date, end_date, start_date[0:6], start_date,
        end_date)

    logger.warn(execute_sql_level_2, 'sql')

    df_level_2 = spark.sql(execute_sql_level_2)

    df_level_2.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_spend_coupon_cash_daily_agg_level_2")
    '''
    生成优惠券level1表
    '''

    execute_sql_level_1 = '''select 
                game_id,
                game_code,
                date,
                package_type_id,
                package_type_name,
                from_app_id,
                from_app_code,
                os_type,
                recom_game_relation,
                sum(amount) as amount,
                dt
                from bi.revenue_spend_coupon_cash_daily_agg_level_2
                where dt >= '%s' and dt < '%s'
                group by game_id,game_code,date,package_type_id,package_type_name,from_app_id,from_app_code,os_type,recom_game_relation,dt''' % (
        start_date, end_date)

    logger.warn(execute_sql_level_1, 'sql')

    df_level_1 = spark.sql(execute_sql_level_1)

    df_level_1.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_spend_coupon_cash_daily_agg_level_1")
    '''
    插入到MongoDB
    '''

    mongo = mongoExecute()
    mongo.collectionAppend(
        spark.sql('''select 
                                        game_id as gameId, 
                                        game_code as gameCode, 
                                        date , 
                                        package_type_id as packageTypeId, 
                                        from_app_id as fromAppId, 
                                        from_app_code as fromAppCode, 
                                        os_type as osType, 
                                        recom_game_relation as recommendRelation, 
                                        amount as value ,
                                        dt
                                        from
                                        bi.revenue_spend_coupon_cash_daily_agg_level_1
                                        where dt >= '%s' and dt < '%s' ''' %
                  (start_date, end_date)), "GameProfitDB", "coupon.brief",
        start_date, end_date)
    mongo.collectionAppend(
        spark.sql('''select 
                                        date , 
                                        game_id as gameId, 
                                        game_code as gameCode, 
                                        recom_game_id as relateGameId,
                                       recom_game_code as relateGameCode,
                                        package_type_id as packageTypeId, 
                                        from_app_id as fromAppId, 
                                        from_app_code as fromAppCode, 
                                        os_type as osType, 
                                        recom_game_relation as recommendRelation, 
                                        amount as value ,
                                        dt
                                        from
                                        bi. revenue_spend_coupon_cash_daily_agg_level_2
                                        where dt >= '%s' and dt < '%s' ''' %
                  (start_date, end_date)), "GameProfitDB", "coupon.detail",
        start_date, end_date)

예제 #2

파일 보기

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    # Queries are expressed in HiveQL  currencytype(3) & currencytype(100)
    gspropsmalldb_mobileprops_sql = "select t1.game as game_id, " \
                                    "t1.gamecode as game_code, " \
                                    "t1.date as date, " \
                                    "t1.goodsid as goods_id, " \
                                    "t2.goods_name as goods_name, " \
                                    "t1.pkgtype as package_type, " \
                                    "t3.enum_value as package_type_name, " \
                                    "t1.fromapp as from_app_id, " \
                                    "t1.fromappcode as from_app_code, " \
                                    "t1.ostype as os_type, " \
                                    "t1.recomgame as recom_game_id, " \
                                    "t1.recomgamecode as recom_game_code, " \
                                    "case " \
                                    "when t1.recomgame is null then 1 " \
                                    "when t1.recomgame is not null then 3 " \
                                    "end " \
                                    "as recom_game_relation, " \
                                    "sum(cash_amount) as cash_amount, " \
                                    "sum(silver_amount) as silver_amount, " \
                                    "max(t1.dt) as dt " \
                                    "from " \
                                    "(select game, " \
                                    "gamecode, " \
                                    "date, " \
                                    "goodsid, " \
                                    "case when currencytype = 100 then currencynum else 0 end as cash_amount, " \
                                    "case when currencytype = 3 then currencynum else 0 end as silver_amount, " \
                                    "pkgtype, " \
                                    "fromapp, " \
                                    "fromappcode, " \
                                    "ostype, " \
                                    "recomgame, " \
                                    "recomgamecode, " \
                                    "currencynum, " \
                                    "dt " \
                                    "FROM ods.gspropsmalldb_mobileprops " \
                                    "where dt >= '%s' and dt < '%s' and currencytype in(100, 3) " \
                                    ") t1 " \
                                    "inner join " \
                                    "(select goods_id, " \
                                    "goods_name, " \
                                    "goods_class " \
                                    "from dwd.dim_goods_dict where goods_class = 2 " \
                                    ") t2 " \
                                    "on t1.goodsid = t2.goods_id " \
                                    "left join " \
                                    "(select enum_key, " \
                                    "enum_value, " \
                                    "enum_type " \
                                    "from dwd.dim_common_enum_dict " \
                                    "where enum_type = 'pkgtype' " \
                                    ") t3 " \
                                    "on t1.pkgtype = t3.enum_key " \
                                    "group by t1.game, " \
                                    "t1.gamecode, " \
                                    "t1.date, " \
                                    "t1.goodsid, " \
                                    "t2.goods_name, " \
                                    "t1.pkgtype, " \
                                    "t3.enum_value, " \
                                    "t1.fromapp, " \
                                    "t1.fromappcode, " \
                                    "t1.ostype, " \
                                    "t1.recomgame, " \
                                    "t1.recomgamecode " \
                                    % (start_date, end_date)

    logger.warn(gspropsmalldb_mobileprops_sql,
                'gspropsmalldb_mobileprops_sql ')
    gspropsmalldb_mobileprops_df = spark.sql(gspropsmalldb_mobileprops_sql)

    drop_partition = partition(spark, logger)
    drop_partition.dropPartition(
        "bi.revenue_income_propsmall_daily_agg_level_2_1", "dt", start_date,
        end_date)
    gspropsmalldb_mobileprops_df \
        .write.partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2_1")

예제 #3

파일 보기

파일: firstlogin_whole.py 프로젝트: heavylgf/liugf.python

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    '''
    处理逻辑
    大致步骤  生成中间表 -》 删除最终表分区 -》插入最终表

    创建表
    create table if not exists bi.firstlogin_whole (
        uid	int,
        group	int,
        app	int,
        fromapp	int,
        game	int,
        channel	int,
        promcode	int,
        hardid	string,
        sys	int,
        gamecode	string,
        datatype	int,
        pkgtype	int,
        fromappcode	string,
        ostype	int,
        jointdatafrom	int,
        datetimes   string,
        area_province string,
        area_city  string,
        area_district string)
        PARTITIONED BY (
        dt     string)
        STORED AS ORC

    创建中间表
    create table if not exists stag.firstlogin_whole (
        uid	int,
        group	int,
        app	int,
        fromapp	int,
        game	int,
        channel	int,
        promcode	int,
        hardid	string,
        sys	int,
        gamecode	string,
        datatype	int,
        pkgtype	int,
        fromappcode	string,
        ostype	int,
        jointdatafrom	int,
        datetimes   string,
        area_province string,
        area_city  string,
        area_district string)
        STORED AS ORC

    '''
    '''
    按时间筛选获得时间段内的最小粒度
    '''

    execute_sql_to_stag = '''select 
                                    uid,
                                    group,
                                    app,
                                    fromapp,
                                    game,
                                    channel,
                                    promcode,
                                    hardid,
                                    sys,
                                    gamecode,
                                    datatype,
                                    pkgtype,
                                    fromappcode,
                                    ostype,
                                    jointdatafrom,
                                    min(
                                    concat(
                                    cast(date as string),
                                    case when length(time)=1 then concat('00000',cast(time as string)) 
                                         when length(time)=2 then concat('0000',cast(time as string))
                                         when length(time)=3 then concat('000',cast(time as string))
                                         when length(time)=4 then concat('00',cast(time as string))
                                         when length(time)=5 then concat('0',cast(time as string))
                                         when length(time)=6 then cast(time as string)
                                         end ))  datetimes,
                                    area_province, 
                                    area_city, 
                                    area_district
                                from ods.gslogindb_logindetail
                                where uid is not null and uid <> 0 and dt >=%(sdate)s and dt <%(edate)s
                                group by 
                                    uid,
                                    group,
                                    app,
                                    fromapp,
                                    game,
                                    channel,
                                    promcode,
                                    hardid,
                                    sys,
                                    gamecode,
                                    datatype,
                                    pkgtype,
                                    fromappcode,
                                    ostype,
                                    jointdatafrom,
                                    area_province, 
                                    area_city, 
                                    area_district
                                    ''' % {
        "sdate": start_date,
        "edate": end_date
    }

    logger.warn(execute_sql_to_stag, 'sql')

    df_stag = spark.sql(execute_sql_to_stag)

    df_stag.write.mode('overwrite').format("orc").saveAsTable(
        "stag.firstlogin_whole")
    '''
    删除最终表的分区
    '''
    hive_partition = partition(spark, logger)

    hive_partition.dropPartition("bi.firstlogin_whole", "dt", start_date,
                                 end_date)
    '''
    插入最终表
    '''
    execute_sql_to_final = '''
                            select
                            uid,          
                            group,        
                            app,          
                            fromapp,      
                            game,         
                            channel,      
                            promcode,     
                            hardid,       
                            sys,          
                            gamecode,     
                            datatype,     
                            pkgtype,      
                            fromappcode,  
                            ostype,       
                            jointdatafrom, 
                            datetimes,
                            area_province,  
                            area_city,    
                            area_district,
                            substr(datetimes,0,8) as dt
                            from (
                            select 
                            /*+ STREAMTABLE(a) */
                            b.uid,          
                            b.group,        
                            b.app,          
                            b.fromapp,      
                            b.game,         
                            b.channel,      
                            b.promcode,     
                            b.hardid,       
                            b.sys,          
                            b.gamecode,     
                            b.datatype,     
                            b.pkgtype,      
                            b.fromappcode,  
                            b.ostype,       
                            b.jointdatafrom, 
                            b.area_province,  
                            b.area_city,    
                            b.area_district,
                            b.datetimes,
                            case when a.datetimes > b.datetimes then 1 when a.uid is null then 1 else 2 end judge 
                            from bi.firstlogin_whole  a
                            full outer join stag.firstlogin_whole b on 
                            NVL(a.uid,'')=NVL(b.uid,'') and 
                            NVL(a.group,'')=NVL(b.group,'') and 
                            NVL(a.app,'')=NVL(b.app,'') and 
                            NVL(a.fromapp,'')=NVL(b.fromapp,'') and 
                            NVL(a.game,'')=NVL(b.game,'') and 
                            NVL(a.channel,'')=NVL(b.channel,'') and 
                            NVL(a.promcode,'')=NVL(b.promcode,'') and 
                            NVL(a.hardid,'')=NVL(b.hardid,'') and 
                            NVL(a.sys,'')=NVL(b.sys,'') and 
                            NVL(a.gamecode,'')=NVL(b.gamecode,'') and 
                            NVL(a.datatype,'')=NVL(b.datatype,'') and 
                            NVL(a.pkgtype,'')=NVL(b.pkgtype,'') and 
                            NVL(a.fromappcode,'')=NVL(b.fromappcode,'') and 
                            NVL(a.ostype,'')=NVL(b.ostype,'') and 
                            NVL(a.jointdatafrom,'')=NVL(b.jointdatafrom,'') and 
                            NVL(a.area_province,'')=NVL(b.area_province,'') and 
                            NVL(a.area_city,'')=NVL(b.area_city,'') and 
                            NVL(a.area_district,'')=NVL(b.area_district,'')
                            ) t1
                            where 
                            judge = 1

    '''

    logger.warn(execute_sql_to_final, 'sql')

    df_final = spark.sql(execute_sql_to_final)

    df_final.write.partitionBy("dt").mode("append").format("orc").saveAsTable(
        "bi.firstlogin_whole")

예제 #4

파일 보기

파일: merge_logsdklog_mobile_detail.py 프로젝트: heavylgf/liugf.python

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    # logsdklog_mobile_detail_sql_1 = "select uid, " \
    #                               "date, " \
    #                               "time, " \
    #                               "app_id, " \
    #                               "app_code, " \
    #                               "app_vers, " \
    #                               "app_promchann, " \
    #                               "area_address, " \
    #                               "area_province, " \
    #                               "area_city, " \
    #                               "area_district, " \
    #                               "area_ip, " \
    #                               "duration, " \
    #                               "session, " \
    #                               "sys, " \
    #                               "tcy_hardid, " \
    #                               "tcy_imei, " \
    #                               "tcy_mac, " \
    #                               "geo, " \
    #                               "geo_lat, " \
    #                               "geo_lon, " \
    #                               "remark_channelID, " \
    #                               "father_code, " \
    #                               "father_id, " \
    #                               "father_promchann, " \
    #                               "father_vers, " \
    #                               "event, " \
    #                               "eqpt_carrier, " \
    #                               "eqpt_facturer, " \
    #                               "eqpt_idfv, " \
    #                               "eqpt_mobile, " \
    #                               "eqpt_net, " \
    #                               "eqpt_os, " \
    #                               "eqpt_resolution-h, " \
    #                               "eqpt_resolution-w, " \
    #                               "eqpt_vers, " \
    #                               "dt " \
    #                               "from ods.logsdklog_mobile_detail " \
    #                               "where dt >= '%s' and dt < '%s' " \
    #                               % (start_date, end_date)
    #
    # logger.warn(logsdklog_mobile_detail_sql_1, 'logsdklog_mobile_detail_sql_1 ')
    # gspropsmalldb_mobileprops_df = spark.sql(logsdklog_mobile_detail_sql_1)
    #
    # gspropsmalldb_mobileprops_df.coalesce(1)
    #
    # drop_partition = partition(spark, logger)
    # drop_partition.dropPartition("ods.merge_logsdklog_mobile_detail_1", "dt", start_date, end_date)
    # gspropsmalldb_mobileprops_df \
    #     .coalesce(1) \
    #     .write \
    #     .partitionBy("dt") \
    #     .format("orc") \
    #     .mode("append") \
    #     .saveAsTable("ods.merge_logsdklog_mobile_detail_1")
    #
    # logsdklog_mobile_detail_sql = "select uid, " \
    #                               "date, " \
    #                               "time, " \
    #                               "app_id, " \
    #                               "app_code, " \
    #                               "app_vers, " \
    #                               "app_promchann, " \
    #                               "area_address, " \
    #                               "area_province, " \
    #                               "area_city, " \
    #                               "area_district, " \
    #                               "area_ip, " \
    #                               "duration, " \
    #                               "session, " \
    #                               "sys, " \
    #                               "tcy_hardid, " \
    #                               "tcy_imei, " \
    #                               "tcy_mac, " \
    #                               "geo, " \
    #                               "geo_lat, " \
    #                               "geo_lon, " \
    #                               "remark_channelID, " \
    #                               "father_code, " \
    #                               "father_id, " \
    #                               "father_promchann, " \
    #                               "father_vers, " \
    #                               "event, " \
    #                               "eqpt_carrier, " \
    #                               "eqpt_facturer, " \
    #                               "eqpt_idfv, " \
    #                               "eqpt_mobile, " \
    #                               "eqpt_net, " \
    #                               "eqpt_os, " \
    #                               "eqpt_resolution-h, " \
    #                               "eqpt_resolution-w, " \
    #                               "eqpt_vers, " \
    #                               "dt " \
    #                               "from ods.merge_logsdklog_mobile_detail_1 " \
    #                               "where dt >= '%s' and dt < '%s' " \
    #                               % (start_date, end_date)
    #
    # logger.warn(logsdklog_mobile_detail_sql, 'logsdklog_mobile_detail_sql ')
    # gspropsmalldb_mobileprops_df = spark.sql(logsdklog_mobile_detail_sql)
    #
    # gspropsmalldb_mobileprops_df.coalesce(1)
    #
    # drop_partition = partition(spark, logger)
    # drop_partition.dropPartition("ods.logsdklog_mobile_detail", "dt", start_date, end_date)
    # gspropsmalldb_mobileprops_df \
    #     .coalesce(1) \
    #     .write \
    #     .partitionBy("dt") \
    #     .format("orc") \
    #     .mode("append") \
    #     .saveAsTable("ods.logsdklog_mobile_detail")

    test_store_in_orc_sql_1 = "select name, " \
                              "age, " \
                              "datetime, " \
                              "date as date " \
                              "from tempdb.test_store_in_orc " \
                              "where date='2018-12-05' and sex = 'female' "

    logger.warn(test_store_in_orc_sql_1, 'test_store_in_orc_sql_1 ')
    test_store_in_orc_df_1 = spark.sql(test_store_in_orc_sql_1)

    drop_partition = partition(spark, logger)
    drop_partition.dropPartition("tempdb.test_store_in_orc_1", "date", start_date, end_date)
    test_store_in_orc_df_1 \
        .coalesce(1) \
        .write \
        .partitionBy("date") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("tempdb.test_store_in_orc_1")

    test_store_in_orc_sql = "select name, " \
                            "age, " \
                            "datetime, " \
                            "date as date, 'female' as sex " \
                            "from tempdb.test_store_in_orc_1 " \
                            "where date='2018-12-05' "

    logger.warn(test_store_in_orc_sql, 'test_store_in_orc_sql ')
    test_store_in_orc_df = spark.sql(test_store_in_orc_sql)

    # drop_partition = partition(spark, logger)
    # drop_partition.dropPartition("tempdb.test_store_in_orc_df", "dt", start_date, end_date)
    test_store_in_orc_df \
        .coalesce(1) \
        .write \
        .partitionBy("date", "sex") \
        .format("orc") \
        .mode("overwrite") \
        .saveAsTable("tempdb.test_store_in_orc")

예제 #5

파일 보기

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    '''
    处理逻辑
    大致步骤  删除分区 -》读取转换数据 -》 写入hive  -》 写入mongo
    '''
    '''
    添加无推荐关系与被推荐关系数据
    '''

    hive_partition = partition(spark, logger)

    hive_partition.dropPartition("bi.revenue_spend_cash_daily_agg_level_2",
                                 "dt", start_date, end_date)

    execute_sql_level_2_step_1 = '''
                                    select
                                    /*+ MAPJOIN(smalltable)*/
                                    case when game_id is null then -1 else game_id end as game_id,
                                    case when game_code is null then '' else game_code end as game_code,
                                    `date` ,
                                    case when item_id is null then -1 else item_id end as item_id,
                                    case when item_name is null then '' else item_name end as item_name,
                                    case when package_type is null then -1 else package_type end as package_type,
                                    case when enum_pkg.enum_value is null then '' else enum_pkg.enum_value end as package_type_name,
                                    case when from_app_id is null then -1 else from_app_id end as from_app_id,
                                    case when from_app_code is null then '' else from_app_code end as from_app_code,
                                    case when os_type is null then -1 else os_type end as os_type,
                                    recom_game_id,
                                    recom_game_code,
                                    case when recom_game_id is null or recom_game_id = '' then 1 else 3 end as recom_game_relation,
                                    item_amount,
                                    dt
                                    from (
                                        select
                                        game_id,
                                        game_code,
                                        `date`,
                                        item_id,
                                        item_name,
                                        package_type,
                                        from_app_id,
                                        from_app_code,
                                        os_type,
                                        recom_game_id,
                                        recom_game_code,
                                        sum(item_amount) as item_amount,
                                        max(dt) as dt
                                        from (
                                            select  
                                            reward.game as game_id,
                                            reward.gamecode as game_code,
                                            reward.`date`,
                                            reward.item as item_id,
                                            dict.item_name,
                                            case when reward.ostype = 3 then 400 else reward.pkgtype end as package_type,
                                            reward.fromapp as from_app_id,
                                            reward.fromappcode as from_app_code,
                                            reward.ostype as os_type,
                                            reward.recomgame as recom_game_id,
                                            reward.recomgamecode as recom_game_code,
                                            cast(reward.itemnum * dict.item_vale * 100  as bigint) as item_amount,
                                            reward.dt
                                            from ods.gsrewardsystemdb_reward reward,dwd.dim_items_dict dict 
                                            where reward.item = dict.item_id 
                                            and dict.item_type_id_level_1 in (400000000,500000000)
                                            and reward.dt >= %(sdate)s and reward.dt < %(edate)s 
                                        ) T1
                                        group by 
                                        game_id,
                                        game_code,
                                        `date`,
                                        item_id,
                                        item_name,
                                        package_type,
                                        from_app_id,
                                        from_app_code,
                                        os_type,
                                        recom_game_id,
                                        recom_game_code
                                    ) T2
                                    left join dwd.dim_common_enum_dict enum_pkg on T2.package_type = enum_pkg.enum_key and enum_pkg.enum_type = 'pkgtype'
    ''' % {
        "sdate": start_date,
        "edate": end_date
    }

    logger.warn(execute_sql_level_2_step_1, 'sql')

    df_level_2_step_1 = spark.sql(execute_sql_level_2_step_1)

    df_level_2_step_1.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_spend_cash_daily_agg_level_2")
    '''
    从被推荐关系生成主推荐关系
    '''

    execute_sql_level_2_step_2 = '''
                                    select
                                    recom_game_id as game_id,
                                    recom_game_code as game_code,
                                    `date`,
                                    item_id,
                                    item_name,
                                    package_type,
                                    package_type_name,
                                    from_app_id,
                                    from_app_code,
                                    os_type,
                                    game_id as recom_game_id,
                                    game_code as recom_game_code,
                                    2 as recom_game_relation,
                                    item_amount,
                                    dt
                                    from bi.revenue_spend_cash_daily_agg_level_2
                                    where recom_game_relation = 3
                                    and dt >= ''' + start_date + ''' and dt < ''' + end_date

    logger.warn(execute_sql_level_2_step_2, 'sql')

    df_level_2_step_2 = spark.sql(execute_sql_level_2_step_2)

    df_level_2_step_2.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_spend_cash_daily_agg_level_2")
    '''
    从二级表去除recommand生成一级表
    '''

    hive_partition.dropPartition("bi.revenue_spend_cash_daily_agg_level_1",
                                 "dt", start_date, end_date)

    execute_sql_level_1_step_1 = '''
                                        select
                                        game_id,
                                        game_code,
                                        `date`,
                                        item_id,
                                        item_name,
                                        package_type,
                                        package_type_name,
                                        from_app_id,
                                        from_app_code,
                                        os_type,
                                        recom_game_relation,
                                        sum(item_amount) as item_amount,
                                        dt
                                        from bi.revenue_spend_cash_daily_agg_level_2
                                        where dt >= ''' + start_date + ''' and dt < ''' + end_date + '''
                                        group by
                                        game_id,
                                        game_code,
                                        `date`,
                                        item_id,
                                        item_name,
                                        package_type,
                                        package_type_name,
                                        from_app_id,
                                        from_app_code,
                                        os_type,
                                        recom_game_relation,
                                        dt
        '''

    logger.warn(execute_sql_level_1_step_1, 'sql')

    df_level_1_step_1 = spark.sql(execute_sql_level_1_step_1)

    df_level_1_step_1.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_spend_cash_daily_agg_level_1")
    '''
    将生成的数据增量插入至mongo中
    '''

    mongo = mongoExecute()

    out_put_level_2_sql = '''
                                            select
                                            game_id as gameId,
                                            game_code as gameCode,
                                            `date`,
                                            item_id as itemId,
                                            item_name as itemName,
                                            package_type as packageTypeId,
                                            package_type_name as packageTypeName,
                                            from_app_id as fromAppId,
                                            case when from_app_code is null then '' else from_app_code end as fromAppCode,
                                            os_type as osType,
                                            recom_game_id as relateGameId,
                                            recom_game_code as relateGameCode,
                                            recom_game_relation as recommendRelation,
                                            item_amount as value,
                                            dt
                                            from
                                            bi.revenue_spend_cash_daily_agg_level_2
                                            where dt >= ''' + start_date + ''' and dt < ''' + end_date

    level_2 = spark.sql(out_put_level_2_sql)

    mongo.collectionAppend(level_2, "GameProfitDB", "rmb.detail", start_date,
                           end_date)

    out_put_level_1_sql = '''
                                            select
                                            game_id as gameId,
                                            game_code as gameCode,
                                            `date`,
                                            item_id as itemId,
                                            item_name as itemName,
                                            package_type as packageTypeId,
                                            package_type_name as packageTypeName,
                                            from_app_id as fromAppId,
                                            case when from_app_code is null then '' else from_app_code end as fromAppCode,
                                            os_type as osType,
                                            recom_game_relation as recommendRelation,
                                            item_amount as value,
                                            dt
                                            from
                                            bi.revenue_spend_cash_daily_agg_level_1
                                            where dt >= ''' + start_date + ''' and dt < ''' + end_date

    level_1 = spark.sql(out_put_level_1_sql)

    mongo.collectionAppend(level_1, "GameProfitDB", "rmb.brief", start_date,
                           end_date)

예제 #6

파일 보기

파일: firstlogin_playtogether_room_account1st.py 프로젝트: heavylgf/liugf.python

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    '''
    处理逻辑
    大致步骤  删除分区 -》 中间表 -》插入最终表
    '''
    # 删除分区
    hive_partition = partition(spark, logger)

    hive_partition.dropPartition("bi.firstlogin_playtogether_room_account1st",
                                 "dt", start_date, end_date)

    # execute_sql_increase_table = '''
    #         select t.uid,
    #             t.app,
    #             cast(t.date as int) date,
    #             cast(t.time as int) time,
    #             t.group,
    #             t.province, t.city, t.district, t.hardid, t.roomno, t.roomtype
    #          from
    #         (select uid,
    #             app, date, time, group, province, city, district, hardid, roomno, roomtype,
    #             cast(concat(cast(date as  string),
    #                         case when length(time)=1 then concat('00000',cast(time as string))
    #                              when length(time)=2 then concat('0000',cast(time as string))
    #                              when length(time)=3 then concat('000',cast(time as string))
    #                              when length(time)=4 then concat('00',cast(time as string))
    #                              when length(time)=5 then concat('0',cast(time as string))
    #                              else cast(time as string)
    #                         end) as bigint) as date_time,
    #             row_number() over(partition by uid, app order by cast(concat(cast(date as  string),
    #             case when length(time)=1 then concat('00000',cast(time as string))
    #                  when length(time)=2 then concat('0000',cast(time as string))
    #                  when length(time)=3 then concat('000',cast(time as string))
    #                  when length(time)=4 then concat('00',cast(time as string))
    #                  when length(time)=5 then concat('0',cast(time as string))
    #                  else cast(time as string)
    #             end) as bigint) )
    #             as rn
    #             from ods.gsplaytogetherdb_gameactive
    #             where app is not null and group is not null and group in (6, 66, 8, 88)
    #         ) t
    #         where rn = 1
    #      '''
    #
    # logger.warn(execute_sql_increase_table, 'sql')
    #
    # increase_table = spark.sql(execute_sql_increase_table)
    #
    # increase_table.write.mode('overwrite').format("orc").saveAsTable("stag.firstlogin_playtogether_room_account1st")
    #
    # # 中间数据插入最终表
    # execute_sql_into_the_table = '''
    #        select
    #            t2.uid,
    #            t2.app,
    #            t2.date,
    #            t2.time,
    #            t2.group,
    #            t2.province,
    #            t2.city,
    #            t2.district,
    #            t2.hardid,
    #            t2.roomno,
    #            t2.roomtype,
    #            t2.date as dt
    #        from stag.firstlogin_playtogether_room_account1st t2
    #    '''
    #
    # logger.warn(execute_sql_into_the_table, 'sql')
    #
    # the_end_table = spark.sql(execute_sql_into_the_table)
    #
    # the_end_table.write.partitionBy("dt").mode('append').format("orc") \
    #     .saveAsTable("bi.firstlogin_playtogether_room_account1st")
    #
    # '''
    # 将生成的数据增量插入至mongo中
    # '''
    # mongo = mongoExecute()
    #
    # mongo.collectionAppend(the_end_table, "bi", "firstlogin_playtogether_room_account1st", start_date, end_date)
    #
    # logger.warn("Job over", "banner")

    execute_sql_increase_table = '''
        select t2.uid,
            t2.app,
            t2.date,
            t2.time,
            t2.group,
            t2.province,
            t2.city,
            t2.district,
            t2.hardid,
            t2.roomno,
            t2.roomtype
            from
        (select uid,
            app,
            date,
            time,
            group,
            province,
            city,
            district,
            hardid,
            roomno,
            roomtype
            from bi.firstlogin_playtogether_room_account1st
        ) t1
        right join
        (select t.uid uid,
                t.app app,
                cast(t.date as int) date,
                cast(t.time as int) time,
                t.group group,
                t.province province,
                t.city city,
                t.district district,
                t.hardid hardid,
                t.roomno roomno,
                t.roomtype roomtype
         from
        (select uid,
            app, date, time, group, province, city, district, hardid, roomno, roomtype,
            cast(concat(cast(date as  string),
                        case when length(time)=1 then concat('00000',cast(time as string))
                             when length(time)=2 then concat('0000',cast(time as string))
                             when length(time)=3 then concat('000',cast(time as string))
                             when length(time)=4 then concat('00',cast(time as string))
                             when length(time)=5 then concat('0',cast(time as string))
                             else cast(time as string)
                        end) as bigint) as date_time,
            row_number() over(partition by uid, app order by cast(concat(cast(date as  string),
            case when length(time)=1 then concat('00000',cast(time as string))
                 when length(time)=2 then concat('0000',cast(time as string))
                 when length(time)=3 then concat('000',cast(time as string))
                 when length(time)=4 then concat('00',cast(time as string))
                 when length(time)=5 then concat('0',cast(time as string))
                 else cast(time as string)
            end) as bigint))
            as rn
            from ods.gsplaytogetherdb_gameactive
            where app is not null and group is not null and group in (6, 66, 8, 88)
        ) t
        where rn = 1
        ) t2
        on t1.uid = t2.uid and t1.app = t2.app
        where t1.uid is null
     '''

    logger.warn(execute_sql_increase_table, 'sql')

    increase_table = spark.sql(execute_sql_increase_table)

    increase_table.write.mode('overwrite').format("orc").saveAsTable(
        "stag.firstlogin_playtogether_room_account1st")

    # 中间数据插入最终表
    execute_sql_into_the_table = '''
        select
            t2.uid,
            t2.app,
            t2.date,
            t2.time,
            t2.group,
            t2.province,
            t2.city,
            t2.district,
            t2.hardid,
            t2.roomno,
            t2.roomtype,
            t2.date as dt
        from stag.firstlogin_playtogether_room_account1st t2
    '''

    logger.warn(execute_sql_into_the_table, 'sql')

    the_end_table = spark.sql(execute_sql_into_the_table)

    the_end_table.write.partitionBy("dt").mode('append').format("orc") \
        .saveAsTable("bi.firstlogin_playtogether_room_account1st")
    '''
    将生成的数据增量插入至mongo中
    '''
    mongo = mongoExecute()

    mongo.collectionAppend(the_end_table, "bi",
                           "firstlogin_playtogether_room_account1st",
                           start_date, end_date)

    logger.warn("Job over", "banner")

예제 #7

파일 보기

파일: revenue_income_propsmall_daily_agg.py 프로젝트: heavylgf/liugf.python

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    # Queries are expressed in HiveQL  currencytype(3) & currencytype(100)
    gspropsmalldb_mobileprops_sql = "select t1.game as game_id, " \
                                    "t1.gamecode as game_code, " \
                                    "t1.date as date, " \
                                    "t1.goodsid as goods_id, " \
                                    "t2.goods_name as goods_name, " \
                                    "t1.pkgtype as package_type, " \
                                    "t3.enum_value as package_type_name, " \
                                    "t1.fromapp as from_app_id, " \
                                    "t1.fromappcode as from_app_code, " \
                                    "t1.ostype as os_type, " \
                                    "t1.recomgame as recom_game_id, " \
                                    "t1.recomgamecode as recom_game_code, " \
                                    "case " \
                                    "when t1.recomgame is null then 1 " \
                                    "when t1.recomgame is not null then 3 " \
                                    "end " \
                                    "as recom_game_relation, " \
                                    "sum(cash_amount) as cash_amount, " \
                                    "sum(silver_amount) as silver_amount, " \
                                    "max(t1.dt) as dt " \
                                    "from " \
                                    "(select game, " \
                                    "gamecode, " \
                                    "date, " \
                                    "goodsid, " \
                                    "case when currencytype = 100 then currencynum else 0 end as cash_amount, " \
                                    "case when currencytype = 3 then currencynum else 0 end as silver_amount, " \
                                    "pkgtype, " \
                                    "fromapp, " \
                                    "fromappcode, " \
                                    "ostype, " \
                                    "recomgame, " \
                                    "recomgamecode, " \
                                    "currencynum, " \
                                    "date as dt " \
                                    "FROM ods.gspropsmalldb_mobileprops " \
                                    "where dt >= '%s' and dt < '%s' and currencytype in(100, 3) " \
                                    ") t1 " \
                                    "inner join " \
                                    "(select goods_id, " \
                                    "goods_name, " \
                                    "goods_class " \
                                    "from dwd.dim_goods_dict where goods_class = 2 " \
                                    ") t2 " \
                                    "on t1.goodsid = t2.goods_id " \
                                    "left join " \
                                    "(select enum_key, " \
                                    "enum_value, " \
                                    "enum_type " \
                                    "from dwd.dim_common_enum_dict " \
                                    "where enum_type = 'pkgtype' " \
                                    ") t3 " \
                                    "on t1.pkgtype = t3.enum_key " \
                                    "group by t1.game, " \
                                    "t1.gamecode, " \
                                    "t1.date, " \
                                    "t1.goodsid, " \
                                    "t2.goods_name, " \
                                    "t1.pkgtype, " \
                                    "t3.enum_value, " \
                                    "t1.fromapp, " \
                                    "t1.fromappcode, " \
                                    "t1.ostype, " \
                                    "t1.recomgame, " \
                                    "t1.recomgamecode " \
                                    % (start_date, end_date)

    logger.warn(gspropsmalldb_mobileprops_sql,
                'gspropsmalldb_mobileprops_sql ')
    gspropsmalldb_mobileprops_df = spark.sql(gspropsmalldb_mobileprops_sql)

    drop_partition = partition(spark, logger)
    drop_partition.dropPartition(
        "bi.revenue_income_propsmall_daily_agg_level_2_1", "dt", start_date,
        end_date)
    gspropsmalldb_mobileprops_df \
        .write.partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2_1")

    gspaydb_basic_sql = "select t1.game as game_id, " \
                        "t1.gamecode as game_code, " \
                        "t1.paydate as date, " \
                        "t1.gamegoodsid as goods_id, " \
                        "t2.goods_name as goods_name, " \
                        "t1.pkgtype as package_type, " \
                        "t3.enum_value as package_type_name, " \
                        "t1.fromapp as from_app_id, " \
                        "t1.fromappcode as from_app_code, " \
                        "t1.ostype as os_type, " \
                        "t1.recomgame as recom_game_id, " \
                        "t1.recomgamecode as recom_game_code, " \
                        "case " \
                        "when t1.recomgame is null then 1 " \
                        "when t1.recomgame is not null then 3 " \
                        "end " \
                        "as recom_game_relation," \
                        "sum(t1.price) as cash_amount, " \
                        "0 as silver_amount, " \
                        "t1.dt " \
                        "from " \
                        "(select game, " \
                        "gamecode, " \
                        "paydate, " \
                        "gamegoodsid ," \
                        "pkgtype, " \
                        "fromapp, " \
                        "fromappcode, " \
                        "ostype, " \
                        "recomgame, " \
                        "recomgamecode, " \
                        "prodver, " \
                        "price, " \
                        "paydate as dt " \
                        "FROM ods.gspaydb_basic " \
                        "where dt='%s' and paydate >= '%s' and paydate < '%s' " \
                        "and (prodver is null or prodver = '') and  product = 6001 " \
                        ") t1 " \
                        "inner join " \
                        "(select goods_id, " \
                        "goods_name, " \
                        "goods_class " \
                        "from dwd.dim_goods_dict where goods_class = 2 " \
                        ") t2 " \
                        "on t1.gamegoodsid = t2.goods_id " \
                        "left join " \
                        "(select enum_key, " \
                        "enum_value, " \
                        "enum_type " \
                        "from dwd.dim_common_enum_dict " \
                        "where enum_type = 'pkgtype' " \
                        ") t3 " \
                        "on t1.pkgtype = t3.enum_key " \
                        "group by t1.game, " \
                        "t1.gamecode, " \
                        "t1.paydate, " \
                        "t1.gamegoodsid, " \
                        "t2.goods_name, " \
                        "t1.pkgtype, " \
                        "t3.enum_value, " \
                        "t1.fromapp, " \
                        "t1.fromappcode, " \
                        "t1.ostype, " \
                        "t1.recomgame, " \
                        "t1.recomgamecode, " \
                        "t1.dt " \
                        % (start_date[0:6], start_date, end_date)

    logger.warn(gspaydb_basic_sql, 'gspaydb_basic_sql ')
    gspaydb_basic_df = spark.sql(gspaydb_basic_sql)

    gspaydb_basic_df \
        .write.partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2_1")

    # insert to agg agg_level_2
    agg_level_2_sql = "select game_id, " \
                      "game_code, " \
                      "date, " \
                      "goods_id, " \
                      "goods_name, " \
                      "package_type, " \
                      "package_type_name, " \
                      "from_app_id, " \
                      "from_app_code, " \
                      "os_type, " \
                      "recom_game_id, " \
                      "recom_game_code, " \
                      "recom_game_relation, " \
                      "sum(cash_amount) as cash_amount, " \
                      "sum(silver_amount) as silver_amount, " \
                      "dt " \
                      "from bi.revenue_income_propsmall_daily_agg_level_2_1 " \
                      "where dt >= '%s' and dt < '%s' " \
                      "group by game_id, game_code, date, goods_id, goods_name, package_type, " \
                      "package_type_name, from_app_id, from_app_code, os_type, recom_game_id, " \
                      "recom_game_code, recom_game_relation, dt " \
                      % (start_date, end_date)

    logger.warn(agg_level_2_sql, 'agg_level_2_sql ')
    agg_level_2_df = spark.sql(agg_level_2_sql)

    drop_partition.dropPartition(
        "bi.revenue_income_propsmall_daily_agg_level_2", "dt", start_date,
        end_date)

    agg_level_2_df \
        .write \
        .partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2")

    main_recommendation_sql = "select recom_game_id as game_id, " \
                              "recom_game_code as game_code, " \
                              "date, " \
                              "goods_id, " \
                              "goods_name, " \
                              "package_type, " \
                              "package_type_name, " \
                              "from_app_id, " \
                              "from_app_code, " \
                              "os_type, " \
                              "game_id as recom_game_id, " \
                              "game_code as recom_game_code, " \
                              "2 as recom_game_relation, " \
                              "cash_amount, " \
                              "silver_amount, " \
                              "dt " \
                              "from bi.revenue_income_propsmall_daily_agg_level_2 " \
                              "where dt >= '%s' and dt < '%s'" \
                              " and recom_game_relation = 3 " \
                              % (start_date, end_date)

    logger.warn(main_recommendation_sql, 'main_recommendation_sql ')
    main_recommendation_df = spark.sql(main_recommendation_sql)

    main_recommendation_df \
        .write \
        .partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_2")

    # insert to agg_level_1
    agg_level_1_sql = "select game_id, " \
                      "game_code, " \
                      "date, " \
                      "goods_id, " \
                      "goods_name, " \
                      "package_type, " \
                      "package_type_name, " \
                      "from_app_id, " \
                      "from_app_code, " \
                      "os_type, " \
                      "recom_game_relation, " \
                      "sum(cash_amount) as cash_amount, " \
                      "sum(silver_amount) as silver_amount, " \
                      "dt " \
                      "from bi.revenue_income_propsmall_daily_agg_level_2 " \
                      "where dt >= '%s' and dt < '%s' " \
                      "group by game_id, " \
                      "game_code, " \
                      "date, " \
                      "goods_id, " \
                      "goods_name, " \
                      "package_type, " \
                      "package_type_name, " \
                      "from_app_id, " \
                      "from_app_code, " \
                      "os_type, " \
                      "recom_game_relation, " \
                      "dt " \
                      % (start_date, end_date)

    drop_partition.dropPartition(
        "bi.revenue_income_propsmall_daily_agg_level_1", "dt", start_date,
        end_date)
    # drop_partition.dropPartition("bi.revenue_spend_exchange_daily_agg_level_1", "dt", '20181107', '20181108')

    logger.warn(agg_level_1_sql, 'agg_level_1_sql ')
    agg_level_1_df = spark.sql(agg_level_1_sql)

    agg_level_1_df \
        .write \
        .partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_income_propsmall_daily_agg_level_1")

    # insert into  agg_level_1 to mongoDB
    insert_mongo_agg_level_1_sql = "select game_id as gameId, " \
                                   "if(game_code is null or game_code = '', '', game_code) as gameCode, " \
                                   "date as date, " \
                                   "if(goods_id is null or goods_id = '', -1, goods_id) as propId, " \
                                   "if(goods_name is null or goods_name = '', '', goods_name) as propName, " \
                                   "if(package_type is null or package_type = '', -1, package_type) " \
                                   "as packageTypeId, " \
                                   "if(package_type_name is null or package_type_name = '', '', package_type_name) " \
                                   "as packageTypeName, " \
                                   "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \
                                   "case " \
                                   "when from_app_code is null then ' ' " \
                                   "when from_app_code = '' then ' ' " \
                                   "else from_app_code " \
                                   "end " \
                                   "as fromAppCode, " \
                                   "if(os_type is null or os_type = '', -1, os_type) as osType, " \
                                   "if(recom_game_relation is null or recom_game_relation = '', -1, " \
                                   "recom_game_relation) as recommendRelation, " \
                                   "if(cash_amount is null or cash_amount = '', 0, cash_amount) as money, " \
                                   "if(silver_amount is null or silver_amount = '', 0, silver_amount) as silvers, " \
                                   "dt " \
                                   "from bi.revenue_income_propsmall_daily_agg_level_1 " \
                                   "where dt >= '%s' and dt < '%s' " \
                                   % (start_date, end_date)

    logger.warn(insert_mongo_agg_level_1_sql, 'insert_mongo_agg_level_1_sql ')
    insert_mongo_agg_level_1_df = spark.sql(insert_mongo_agg_level_1_sql)

    mongo = mongoExecute()
    mongo.collectionAppend(insert_mongo_agg_level_1_df, "GameProfitDB",
                           "unique_prop_income.brief", start_date, end_date)

    # insert into agg_level_2 to mongoDB
    insert_mongo_agg_level_2_sql = "select game_id as gameId, " \
                                   "if(game_code is null or game_code = '', '', game_code) as gameCode, " \
                                   "date as date, " \
                                   "if(goods_id is null or goods_id = '', -1, goods_id) as propId, " \
                                   "if(goods_name is null or goods_name = '', '', goods_id) as propName, " \
                                   "if(package_type is null or package_type = '', -1, package_type) " \
                                   "as packageTypeId, " \
                                   "if(package_type_name is null or package_type_name = '', '', package_type_name) " \
                                   "as packageTypeName, " \
                                   "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \
                                   "case " \
                                   "when from_app_code is null then ' ' " \
                                   "when from_app_code = '' then ' ' " \
                                   "else from_app_code " \
                                   "end " \
                                   "as fromAppCode, " \
                                   "if(os_type is null or os_type = '', -1, os_type) as osType, " \
                                   "recom_game_id as relateGameId, " \
                                   "recom_game_code as relateGameCode, " \
                                   "recom_game_relation as recommendRelation, " \
                                   "if(cash_amount is null or cash_amount = '', 0, cash_amount) as money, " \
                                   "if(silver_amount is null or silver_amount = '', 0, silver_amount) as silvers, " \
                                   "dt " \
                                   "from bi.revenue_income_propsmall_daily_agg_level_2 " \
                                   "where dt >= '%s' and dt < '%s' " \
                                   % (start_date, end_date)
    # % (20181108, 20181109)

    logger.warn(insert_mongo_agg_level_2_sql, 'insert_mongo_agg_level_2_sql ')
    insert_mongo_agg_level_2_df = spark.sql(insert_mongo_agg_level_2_sql)

    mongo.collectionAppend(insert_mongo_agg_level_2_df, "GameProfitDB",
                           "unique_prop_income.detail", start_date, end_date)

예제 #8

파일 보기

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    '''
    处理逻辑
    大致步骤  读取转换数据 -》 写入hive  -》 写入mongo
    '''

    hive_partition = partition(spark, logger)
    hive_partition.dropPartition("bi.revenue_silver_daily_agg_level_2", "dt",
                                 start_date, end_date)
    hive_partition.dropPartition("bi.revenue_silver_daily_agg_level_1", "dt",
                                 start_date, end_date)
    '''
    生成银子level2表
    '''

    execute_sql_level_2 = '''select 
                    NVL(t2.game_id, -1) game_id,
                    NVL(t2.game_code, '') game_code,
                    NVL(t2.date,-1) date,
                    NVL(t2.op_id,-1) op_id,
                    NVL(t2.op_name,'') op_name,
                    NVL(t2.package_type,-1) package_type,
                    NVL(t3.enum_value,'') as package_type_name,
                    NVL(t2.from_app_id,-1) from_app_id,
                    NVL(t2.from_app_code, '') from_app_code,
                    NVL(t2.os_type,-1) os_type,
                    NVL(t2.op_type_classified_name,'') op_type_classified_name,
                    t2.recom_game_id,
                    t2.recom_game_code,
                    t2.recom_game_relation,
                    t2.silver_amount,
                    t2.dt
                    from
                    (
                        select 
                        t1.game_id,
                        t1.game_code,
                        t1.date,
                        t1.op_id,
                        t1.op_name,
                        case when t1.os_type_id = 3 then 400 else t1.package_type_id end as package_type,
                        t1.from_app_id,
                        t1.from_app_code,
                        t1.os_type_id as os_type,
                        t1.op_type_classified_name,
                        t1.recom_game_id,
                        t1.recom_game_code,
                        t1.recom_game_relation,
                        sum(abs(t1.silver_deposit)) as silver_amount,
                        max(t1.dt) as dt
                    from 
                    (
                            select 
                            recom_game_id as game_id,
                            recom_game_code as game_code,
                            date,
                            op_id,
                            op_name,
                            package_type_id,
                            from_app_id,
                            from_app_code,
                            os_type_id,
                            game_id as recom_game_id,
                            game_code as recom_game_code,
                            2 as recom_game_relation,
                            silver_deposit,
                            case 
                            when op_type_id in(4,5) then 0
                            when op_type_id in(1,3) then 1
                            end as op_type_classified_name,
                            dt
                            from dwd.fact_silver_detail
                            where dt >= '%s' and dt < '%s' and log_source='gamelog' and op_type_id in(1,3,4,5) and (recom_game_id is not null or (recom_game_code is not null and recom_game_code!=''))
                        union all
                            select 
                            game_id,
                            game_code,
                            date,
                            op_id,
                            op_name,
                            package_type_id,
                            from_app_id,
                            from_app_code,
                            os_type_id,
                            recom_game_id,
                            recom_game_code,
                            case 
                            when recom_game_id is not null or (recom_game_code is not null and recom_game_code!='') then  3 
                            else 1
                            end as recom_game_relation,
                            silver_deposit,
                            case 
                            when op_type_id in(4,5) then 0
                            when op_type_id in(1,3) then 1
                            end as op_type_classified_name,
                            dt
                            from dwd.fact_silver_detail
                            where dt >= '%s' and dt < '%s' and log_source='gamelog' and op_type_id in(1,3,4,5)
                    ) t1
                    group by t1.game_id,t1.game_code,t1.date,t1.op_id,t1.op_name,t1.package_type_id,t1.from_app_id,t1.from_app_code,t1.os_type_id,t1.op_type_classified_name,t1.recom_game_relation,t1.recom_game_id,t1.recom_game_code
                    ) t2
                    left join 
                    (
                        select * from dwd.dim_common_enum_dict 
                        where enum_type = 'pkgtype'  
                    ) t3
                    on t2.package_type=t3.enum_key''' % (start_date, end_date,
                                                         start_date, end_date)

    logger.warn(execute_sql_level_2, 'sql')

    df_level_2 = spark.sql(execute_sql_level_2)

    df_level_2.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_silver_daily_agg_level_2")
    '''
    生成银子level1表
    '''

    execute_sql_level_1 = '''select 
                game_id,
                game_code,
                date,
                op_id,
                op_name,
                package_type,
                package_type_name,
                from_app_id,
                from_app_code,
                os_type,
                op_type_classified_name,
                recom_game_relation,
                sum(silver_amount) as silver_amount,
                dt
                from bi.revenue_silver_daily_agg_level_2
                where dt >= '%s' and dt < '%s'
                group by 
                game_id,
                game_code,
                date,
                op_id,
                op_name,
                package_type,
                package_type_name,
                from_app_id,
                from_app_code,
                os_type,
                op_type_classified_name,
                recom_game_relation,
                dt''' % (start_date, end_date)

    logger.warn(execute_sql_level_1, 'sql')

    df_level_1 = spark.sql(execute_sql_level_1)

    df_level_1.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.revenue_silver_daily_agg_level_1")
    '''
    插入到MongoDB
    '''

    mongo = mongoExecute()
    mongo.collectionAppend(
        spark.sql('''select 
                                        game_id as gameId, 
                                        game_code as gameCode, 
                                        date , 
                                        op_id as operateId, 
                                        op_name as operateName, 
                                        package_type as packageTypeId, 
                                        from_app_id as fromAppId, 
                                        from_app_code as fromAppCode, 
                                        os_type as osType, 
                                        recom_game_relation as recommendRelation, 
                                        silver_amount as silvers, 
                                        op_type_classified_name as type,
                                        dt
                                        from
                                        bi.revenue_silver_daily_agg_level_1
                                        where dt >= '%s' and dt < '%s' ''' %
                  (start_date, end_date)), "GameProfitDB", "silver.brief",
        start_date, end_date)
    mongo.collectionAppend(
        spark.sql('''select 
                                        game_id as gameId, 
                                        game_code as gameCode, 
                                        date , 
                                        op_id as operateId, 
                                        package_type as packageTypeId, 
                                        from_app_id as fromAppId, 
                                        from_app_code as fromAppCode, 
                                        os_type as osType, 
                                        recom_game_id as relateGameId, 
                                        recom_game_code as relateGameCode, 
                                        recom_game_relation as recommendRelation, 
                                        silver_amount as silvers, 
                                        op_type_classified_name as type,
                                        dt
                                        from
                                        bi.revenue_silver_daily_agg_level_2
                                        where dt >= '%s' and dt < '%s' ''' %
                  (start_date, end_date)), "GameProfitDB", "silver.detail",
        start_date, end_date)

예제 #9

파일 보기

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    gsgiftcoupon_acquiregc_sql = "select t1.game as game_id, " \
                                 "t1.gamecode as game_code, " \
                                 "t1.date as date, " \
                                 "t1.pkgtype as package_type, " \
                                 "t2.enum_value as package_type_name, " \
                                 "t1.fromapp as from_app_id, " \
                                 "t1.fromappcode as from_app_code, " \
                                 "t1.ostype as os_type, " \
                                 "t1.recomgame as recom_game_id, " \
                                 "t1.recomgamecode as recom_game_code, " \
                                 "case " \
                                 "when t1.recomgame is null then 1 " \
                                 "when t1.recomgame is not null then 3 " \
                                 "end " \
                                 "as recom_game_relation, " \
                                 "sum(t1.num) as giftcoupon_amount, " \
                                 "max(t1.dt) as dt " \
                                 "from " \
                                 "(select game, " \
                                 "gamecode, " \
                                 "date, " \
                                 "pkgtype, " \
                                 "fromapp, " \
                                 "fromappcode, " \
                                 "ostype, " \
                                 "recomgame, " \
                                 "recomgamecode, " \
                                 "num, " \
                                 "dt  " \
                                 "from ods.gsgiftcoupondb_acquiregc " \
                                 "where dt >= '%s' and dt < '%s' " \
                                 ") t1 " \
                                 "left join " \
                                 "(select enum_key, " \
                                 "enum_value, " \
                                 "enum_type " \
                                 "from dwd.dim_common_enum_dict " \
                                 "where enum_type = 'pkgtype' " \
                                 ") t2 " \
                                 "on t1.pkgtype = t2.enum_key " \
                                 "group by t1.game, " \
                                 "t1.gamecode, " \
                                 "t1.date, " \
                                 "t1.pkgtype, " \
                                 "t2.enum_value, " \
                                 "t1.fromapp, " \
                                 "t1.fromappcode, " \
                                 "t1.ostype, " \
                                 "t1.recomgame, " \
                                 "t1.recomgamecode " \
                                 % (start_date, end_date) \
        # % (20181107, 20181108)

    logger.warn(gsgiftcoupon_acquiregc_sql, 'gsgiftcoupon_acquiregc_sql')
    gsgiftcoupon_acquiregc_df = spark.sql(gsgiftcoupon_acquiregc_sql)

    hive_partition = partition(spark, logger)
    hive_partition.dropPartition("bi.revenue_spend_exchange_daily_agg_level_2",
                                 "dt", start_date, end_date)

    gsgiftcoupon_acquiregc_df \
        .write.partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_spend_exchange_daily_agg_level_2")

    # gsgiftcoupon_acquiregc_df \
    #     .write \
    #     .format("orc") \
    #     .insertInto("bi.revenue_spend_exchange_daily_agg_level_2")

    main_recommendation_sql = "select recom_game_id as game_id, " \
                              "recom_game_code as game_code, " \
                              "date, " \
                              "package_type, " \
                              "package_type_name, " \
                              "from_app_id, " \
                              "from_app_code, " \
                              "os_type, " \
                              "game_id as recom_game_id, " \
                              "game_code as recom_game_code, " \
                              "2 as recom_game_relation," \
                              "giftcoupon_amount, " \
                              "dt " \
                              "from bi.revenue_spend_exchange_daily_agg_level_2 " \
                              "where dt >= '%s' and dt < '%s' " \
                              " and recom_game_relation = 3 " \
                              % (start_date, end_date) \
        # % (20181107, 20181108)

    logger.warn(main_recommendation_sql, 'main_recommendation_sql')
    main_recommendation_df = spark.sql(main_recommendation_sql)

    main_recommendation_df \
        .write \
        .partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_spend_exchange_daily_agg_level_2")

    # insert to agg_level_1
    agg_level_1_sql = "select game_id, " \
                      "game_code, " \
                      "date, " \
                      "package_type, " \
                      "package_type_name, " \
                      "from_app_id, " \
                      "from_app_code, " \
                      "os_type, " \
                      "recom_game_relation, " \
                      "sum(giftcoupon_amount) as giftcoupon_amount, " \
                      "dt " \
                      "from bi.revenue_spend_exchange_daily_agg_level_2 " \
                      "where dt >= '%s' and dt < '%s' " \
                      "group by " \
                      "game_id, " \
                      "game_code, " \
                      "date, " \
                      "package_type, " \
                      "package_type_name," \
                      "from_app_id, " \
                      "from_app_code, " \
                      "os_type, " \
                      "recom_game_relation, " \
                      "dt " \
                      % (start_date, end_date)
    # % (20181107, 20181108)

    agg_level_1_partition = partition(spark, logger)
    agg_level_1_partition.dropPartition(
        "bi.revenue_spend_exchange_daily_agg_level_1", "dt", start_date,
        end_date)

    logger.warn(agg_level_1_sql, 'agg_level_1_sql ')
    agg_level_1_df = spark.sql(agg_level_1_sql)

    agg_level_1_df \
        .write \
        .partitionBy("dt") \
        .format("orc") \
        .mode("append") \
        .saveAsTable("bi.revenue_spend_exchange_daily_agg_level_1")

    # agg_level_1  into mongoDB
    insert_mongo_agg_level_1_sql = "SELECT game_id as gameId, " \
                                   "if(game_code is null or game_code = '', '', game_code) as gameCode, " \
                                   "date as date, " \
                                   "if(package_type is null or package_type = '', -1, package_type) " \
                                   "as packageTypeId, " \
                                   "if(package_type_name is null or package_type_name = '', '', package_type_name) " \
                                   "as packageTypeName, " \
                                   "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \
                                   "case " \
                                   "when from_app_code is null then ' ' " \
                                   "when from_app_code = '' then ' ' " \
                                   "else from_app_code " \
                                   "end " \
                                   "as fromAppCode, " \
                                   "if(os_type is null or os_type = '', -1, os_type) as osType, " \
                                   "recom_game_relation as recommendRelation, " \
                                   "if(giftcoupon_amount is null or giftcoupon_amount = '', 0, giftcoupon_amount) " \
                                   "as count, " \
                                   "dt " \
                                   "FROM bi.revenue_spend_exchange_daily_agg_level_1 " \
                                   "where dt >= '%s' and dt < '%s'" \
                                   % (start_date, end_date)
    # % (20181107, 20181108)

    logger.warn(insert_mongo_agg_level_1_sql, 'insert_mongo_agg_level_1_sql ')
    insert_mongo_agg_level_1_df = spark.sql(insert_mongo_agg_level_1_sql)

    mongo = mongoExecute()
    mongo.collectionAppend(insert_mongo_agg_level_1_df, "GameProfitDB",
                           "voucher.brief", start_date, end_date)

    # insert_mongo_agg_level_1_df\
    #     .write\
    #     .format("com.mongodb.spark.sql.DefaultSource")\
    #     .mode('overwrite')\
    #     .option("database", "bi")\
    #     .option("collection", "revenue_spend_exchange_daily_agg_level_1")\
    #     .save()

    # agg_level_2  into mongoDB
    insert_mongo_agg_level_2_sql = "SELECT game_id as gameId, " \
                                   "if(game_code is null or game_code = '', '', game_code) as gameCode, " \
                                   "date as date, " \
                                   "if(package_type is null or package_type = '', -1, package_type) " \
                                   "as packageTypeId, " \
                                   "if(package_type_name is null or package_type_name = '', '', package_type_name) " \
                                   "as packageTypeName, " \
                                   "if(from_app_id is null or from_app_id = '', -1, from_app_id) as fromAppId, " \
                                   "case " \
                                   "when from_app_code is null then ' ' " \
                                   "when from_app_code = '' then ' ' " \
                                   "else from_app_code " \
                                   "end " \
                                   "as fromAppCode, " \
                                   "if(os_type is null or os_type = '', -1, os_type) as osType, " \
                                   "recom_game_id as relateGameId, " \
                                   "recom_game_code as relateGameCode, " \
                                   "recom_game_relation as recommendRelation, " \
                                   "if(giftcoupon_amount is null or giftcoupon_amount = '', 0, giftcoupon_amount) " \
                                   "as count, " \
                                   "dt " \
                                   "FROM bi.revenue_spend_exchange_daily_agg_level_2 " \
                                   "where dt >= '%s' and dt < '%s'" \
                                   % (start_date, end_date) \
        # % (20181107, 20181108)

    logger.warn(insert_mongo_agg_level_2_sql, 'insert_mongo_agg_level_2_sql ')
    insert_mongo_agg_level_2_df = spark.sql(insert_mongo_agg_level_2_sql)

    mongo.collectionAppend(insert_mongo_agg_level_2_df, "GameProfitDB",
                           "voucher.detail", start_date, end_date)

예제 #10

파일 보기

def logic(start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE):
    '''
    处理逻辑
    大致步骤  删除分区 -》 中间表 -》插入最终表
    '''
    # 删除分区
    hive_partition = partition(spark, logger)

    hive_partition.dropPartition("bi.firstlogin_mobile_lianyun_user", "dt",
                                 start_date, end_date)

    execute_sql_increase_table = '''
  select t6.uid, 
       t6.date,
	   t6.time,
	   t6.channel,
	   t6.group,
	   t6.ostype, 
	   t6.jointdatafrom
    from
(select uid 
    from bi.firstlogin_mobile_lianyun_user
) t5
right join
(select t3.uid, 
       cast(substr(t3.datetimes,1,8) as int) date,
	   cast(substr(t3.datetimes,9) as int) time,
	   t4.channel,
	   t4.group,
	   t4.ostype, 
	   t4.jointdatafrom
	from
( select t2.uid uid,
	   min(t2.datetimes) datetimes
    from	
(select app_id 
    from dwd.dim_game_dict 
    where run_type = '联运_手游联运'
) t1
left join
(select app,
	   uid,
	   datetimes
	from bi.firstlogin_whole 
	where dt>=%(sdate)s and dt < %(edate)s
	and uid is not null
	group by app, uid, datetimes
) t2
on t1.app_id = t2.app
where t2.uid is not null 
group by t2.uid
) t3  
left join
(select t.uid uid,
	   t.datetimes datetimes,
	   t.channel channel,
	   t.group group,
	   t.ostype ostype,
	   t.jointdatafrom jointdatafrom
	from
(select uid,
	   datetimes,
	   channel,
	   group,
	   ostype,
	   jointdatafrom,
	   row_number() over(partition by uid order by datetimes) as rn
	from bi.firstlogin_whole
	where dt>=%(sdate)s and dt < %(edate)s 
	and uid is not null
	group by uid,channel,group,ostype,jointdatafrom,datetimes
) t
where rn =1
) t4
on t3.datetimes = t4.datetimes and t3.uid = t4.uid
) t6
on t5.uid = t6.uid
where t5.uid is null
           ''' % {
        "sdate": start_date,
        "edate": end_date
    }

    logger.warn(execute_sql_increase_table, 'sql')

    increase_table = spark.sql(execute_sql_increase_table)

    increase_table.write.mode('overwrite').format("orc").saveAsTable(
        "stag.firstlogin_mobile_lianyun_user")

    # 中间数据插入最终表
    execute_sql_into_the_table = '''select
                                    t6.uid, 
       t6.date,
	   t6.time,
	   t6.channel,
	   t6.group,
	   t6.ostype, 
	   t6.jointdatafrom,
	   t6.date as dt
    from stag.firstlogin_mobile_lianyun_user t6
    '''

    logger.warn(execute_sql_into_the_table, 'sql')

    the_end_table = spark.sql(execute_sql_into_the_table)

    the_end_table.write.partitionBy("dt").mode('append').format(
        "orc").saveAsTable("bi.firstlogin_mobile_lianyun_user")
    '''
    将生成的数据增量插入至mongo中
    '''

    mongo = mongoExecute()

    mongo.collectionAppend(the_end_table, "bi",
                           "firstlogin_mobile_lianyun_user", start_date,
                           end_date)

    logger.warn("Job over", "banner")

예제 #11

파일 보기

파일: firstlogin_yaodou_user_game.py 프로젝트: heavylgf/liugf.python

def logic(start_date=DEFAULT_START_DATE,end_date=DEFAULT_END_DATE):

    '''
    处理逻辑
    大致步骤  删除分区 -》 中间表 -》插入最终表
    '''
    # 删除分区
    hive_partition = partition(spark, logger)

    hive_partition.dropPartition("bi.firstlogin_yaodou_user_game", "dt", start_date, end_date)

    execute_sql_increase_table = '''
  select t6.uid, 
       t6.date,
	   t6.group, 
	   t6.game
    from
(select uid, game
    from bi.firstlogin_yaodou_user_game
) t5
right join

(select t3.uid uid, 
       cast(substr(t3.datetimes,1,8) as int) date,
	   t4.group group, 
	   t3.game game
	from
(select t1.game_id game,
       t2.uid uid,
	   t2.datetimes datetimes
    from
(select game_id 
    from dwd.dim_game_dict 
    where run_type = '联运_妖豆'
) t1
left join
(select game,
	   uid,
	   min(datetimes) datetimes
	from bi.firstlogin_whole
	where dt>=%(sdate)s and dt < %(edate)s
	and uid is not null and game is not null 
	and group > 0 and group not in (6,66,8,88,68,69,55)
	group by game, uid
) t2
on t1.game_id = t2.game
where t2.uid is not null 
group by t1.game_id, t2.uid, t2.datetimes
) t3 

left join
(select t.uid uid,
       t.game game,
	   t.datetimes datetimes,
	   t.group group
	from
(select uid,
       game,
	   min(datetimes) as datetimes,
	   group,
	   row_number() over(partition by uid, game order by datetimes) as rn
	from bi.firstlogin_whole
	where dt>=%(sdate)s and dt < %(edate)s
	and uid is not null
	group by uid,game,group,datetimes
) t
where rn =1
) t4
on t3.datetimes = t4.datetimes and t3.uid = t4.uid and t3.game = t4.game
) t6

on t5.uid = t6.uid and t5.game = t6.game
where t5.uid is null

           ''' %{"sdate":start_date,"edate":end_date}

    logger.warn(execute_sql_increase_table,'sql')

    increase_table = spark.sql(execute_sql_increase_table)

    increase_table.write.mode('overwrite').format("orc").saveAsTable("stag.firstlogin_yaodou_user_game")


# 中间数据插入最终表
    execute_sql_into_the_table = '''select
                                    t6.uid, 
       t6.date,
	   t6.group, 
	   t6.game,
	   t6.date as dt
    from stag.firstlogin_yaodou_user_game t6
    '''

    logger.warn(execute_sql_into_the_table, 'sql')

    the_end_table = spark.sql(execute_sql_into_the_table)

    the_end_table.write.partitionBy("dt").mode('append').format("orc").saveAsTable("bi.firstlogin_yaodou_user_game")


    '''
    将生成的数据增量插入至mongo中
    '''

    mongo = mongoExecute()

    mongo.collectionAppend(the_end_table,"bi","firstlogin_yaodou_user_game", start_date, end_date)

    logger.warn("Job over", "banner")