def fetch_segwords(tablename):
    mysql = msc.MyPymysqlPool("dbMysql")
    sql = "SELECT t1.keyword_segmented,t2.seqno \
            FROM pzbase.ai_keywords_classification_train2 t1 \
            inner join pzbase.ai_keywords_classification_classdef t2 \
                    on t1.class_level1=t2.class_level1 and t1.class_level2=t2.class_level2 and t1.class_level3=t2.class_level3 and t2.valid_flag=1"

    rst1 = mysql.getAll(sql)

    sql = "SELECT keyword_segmented,seqno FROM pzbase.ai_keywords_classification_test2_gnb where length(keyword)!=char_length(keyword) and prediction_method is null"
    rst2 = mysql.getAll(sql)
    mysql.dispose()
    return rst1, rst2
def insert_result(tab_rst, tab_image_q, tab_image_lib, imgid_q, mtid, mpkpnum,
                  kpnum_l, kpnum_q):
    mysql = ms.MyPymysqlPool("dbMysql")
    sql = "insert into %s(img_id_query, img_id_lib, img_path_query, img_path_lib, \
                ratio_kpnum_match, kpnum_match, kpnum_img_query, kpnum_img_lib, op_dt) \
            select t1.img_id, t2.img_id, t1.img_path, t2.img_path, %f, %d, %d, %d, SYSDATE() \
            from %s t1 \
            inner join %s t2 on t2.img_id=%d \
            where t1.img_id=%d" % (tab_rst, mpkpnum / min(kpnum_l, kpnum_q),
                                   mpkpnum, kpnum_q, kpnum_l, tab_image_q,
                                   tab_image_lib, mtid, imgid_q)
    rnum = mysql.insert(sql)
    mysql.dispose()  # 释放资源
    return rnum
def fetch_data(num_s, num_e):
	mysql = ms.MyPymysqlPool("dbMysql")
	sql = "SELECT * FROM imgbase.img_sift_features1 \
			where img_id between %s and %s" % (num_s, num_e)
	rst = mysql.getAll(sql)

	if not rst:
		mysql.dispose()		# 释放资源
		print('		No data!')
		return -1
	else:
		df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0')
		print('		%d rows data have been fetched.' % len(df))
		mysql.dispose()
		return df
def update_calibration(dt):
    log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8]
    # 计算下采样之后'点击/曝光'的比例,并插入dsp_ctr_calibration表中
    sql = "update dspbase.dsp_ctr_calibration set train_positive=(select \
           sum(case when log_clk_flag=1 then 1 else 0 end) \
           from dspbase.dsp_ctr_train where log_day=\'%s\'),train_negative=(select \
           sum(case when log_clk_flag=0 then 1 else 0 end) \
           from dspbase.dsp_ctr_train where log_day=\'%s\'),train_ratio=(select \
           sum(case when log_clk_flag=1 then 1 else 0 end)/sum(case when log_clk_flag=0 then 1 else 0 end) \
           from dspbase.dsp_ctr_train where log_day=\'%s\') where log_day=\'%s\'" % (log_day, log_day, log_day, log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.update(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
def fetch_data(table, dt_info, lmt=''):
    mysql = ms.MyPymysqlPool("dbMysql")
    sql = "select \
          camp_settlement_price\
          ,ctv_size\
          ,ifnull(adsp_last7days_ctr,0)\
          ,ifnull(adsp_yesterday_ctr,0)\
          ,ifnull(adsp_last7days_clk_cnt,0)\
          ,ifnull(adsp_yesterday_clk_cnt,0)\
          ,ifnull(ader_last7days_ctr,0)\
          ,ifnull(ader_yesterday_ctr,0)\
          ,ifnull(ader_last7days_clk_cnt,0)\
          ,ifnull(ader_yesterday_clk_cnt,0)\
          ,ifnull(ctv_last7days_ctr,0)\
          ,ifnull(ctv_yesterday_ctr,0)\
          ,ifnull(ctv_last7days_clk_cnt,0)\
          ,ifnull(ctv_yesterday_clk_cnt,0)\
          ,ifnull(media_last7days_ctr,0)\
          ,ifnull(media_yesterday_ctr,0)\
          ,ifnull(media_last7days_clk_cnt,0)\
          ,ifnull(media_yesterday_clk_cnt,0)\
          ,ifnull(city_last7days_ctr,0)\
          ,ifnull(city_yesterday_ctr,0)\
          ,ifnull(city_last7days_clk_cnt,0)\
          ,ifnull(city_yesterday_clk_cnt,0)\
          ,ifnull(ifa_last7days_ctr,0)\
          ,ifnull(ifa_yesterday_ctr,0)\
          ,ifnull(ifa_last7days_clk_cnt,0)\
          ,ifnull(ifa_yesterday_clk_cnt,0)\
          ,ifnull(imei_last7days_ctr,0)\
          ,ifnull(imei_yesterday_ctr,0)\
          ,ifnull(imei_last7days_clk_cnt,0)\
          ,ifnull(imei_yesterday_clk_cnt,0)\
          from dspbase.%s where log_day between DATE_SUB(\'%s\', INTERVAL 30 DAY) and \'%s\' %s" \
          % (table, dt_info, dt_info, lmt)
    rst = mysql.getAll(sql)
    if not rst:
        mysql.dispose()
        print('				No data!')
        exit()
    else:
      df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna(-1)
      print('		%d rows data have been fetched.' % len(df))
      mysql.dispose()
      return df
def fetch_data(tabimg, tabfets, num_s, num_e):
    mysql = ms.MyPymysqlPool("dbMysql")
    sql = "SELECT t2.* \
            FROM %s t1 \
            inner join %s t2 on t1.img_id=t2.img_id \
            where t1.img_id between %s and %s" % (tabimg, tabfets, num_s,
                                                  num_e)
    rst = mysql.getAll(sql)

    if not rst:
        mysql.dispose()  # 释放资源
        print('     No data!')
        return -1
    else:
        df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0')
        print('     %d rows data have been fetched.' % len(df))
        mysql.dispose()
        return df
Пример #7
0
def insert_question_similarity(model, vocab, dest_records, base_records,
                               threshold, tablename):
    mysql = msc.MyPymysqlPool("dbMysql")

    # 计算rst中两两记录之间的相似度,将大于阈值的存入question_similarity_table
    i = 0
    j = 0
    for dest_record in dest_records:
        # mysql.begin()  # 开启事务
        seqno1 = dest_record[0]
        stem1 = dest_record[1]
        segwords1 = dest_record[2]
        segwords1_list = segwords1.split()
        max_similarity = -1
        max_seqno = -1
        for base_record in base_records:
            seqno2 = int(base_record[0])
            stem2 = base_record[1]
            segwords2 = base_record[2]
            segwords2_list = segwords2.split()

            if len(segwords1_list) > 0 and len(segwords2_list) > 0:
                similarity = calculate_similarity(model, segwords1_list,
                                                  segwords2_list)
                if similarity > max_similarity:
                    max_similarity = similarity
                    max_seqno = seqno2
                if (similarity >= threshold):
                    sql = ''.join(['insert into ', tablename,
                                   "(question_seqno1,question_seqno2,question_stem1,question_stem2," \
                                   "question_stem_segment1,question_stem_segment2,similarity,load_time)" \
                                   " values(", str(seqno1), ",", str(seqno2), ",'", str(stem1), "','", str(stem2),
                                   "','", str(segwords1), "','", str(segwords2), "',", str(similarity), \
                                   ", CURRENT_TIMESTAMP());"])
                    mysql.insert(sql)
                    i += 1
        j += 1
        print('seqno1, max_seqno, max_similarity: ', seqno1, max_seqno,
              max_similarity)
        mysql.end()  # 结束提交
    mysql.dispose()
    loginfo = '%d similar rows have been inserted into %s!' % (i, tablename)
    gl.write_log(logpath, 'info', loginfo)
def fetch_data(tab_rst,
               lmt_kpsup=0.05,
               lmt_kpup=1,
               op_tt='2000-01-01 00:00:00',
               mtksup=10):
    mysql = ms.MyPymysqlPool("dbMysql")
    sql = "SELECT img_path_query,img_path_lib,ratio_kpnum_match,kpnum_match,kpnum_img_query,kpnum_img_lib \
			FROM %s \
			where ratio_kpnum_match between %f and %f and op_dt>='%s' and kpnum_match>=%d"    \
        % (tab_rst, lmt_kpsup, lmt_kpup, op_tt, mtksup)
    rst = mysql.getAll(sql)

    if not rst:
        mysql.dispose()  # 释放资源
        print('	 No data!')
        return -1
    else:
        df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0')
        print('	 %d rows data have been fetched.' % len(df))
        mysql.dispose()
        return df
Пример #9
0
def fetch_data(sqlstr, dataitems):
    """执行数据读取SQL。

    Args:
        sqlstr:特征值读取SQL
        dataitems:特征数据项列表

    Returns:
        df:DataFrame格式SQL返回数据集
    """
    mysql = ms.MyPymysqlPool("dbMysql")
    rst = mysql.getAll(sqlstr)
    if not rst:
        mysql.dispose()   # 释放资源
        print('No data: ', dataitems)
        return None
    else:
        df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('')
        print(' %d rows have been fetched.' % len(df))
        mysql.dispose()
        return df
Пример #10
0
def load_question_textbook(subject_type, textbook_name, path_textbook_image,
                           tablename):
    """
    加载教材版本信息
    """
    mysql = msc.MyPymysqlPool("dbMysql")
    init_sql = ''.join(['insert into ', tablename, "(path_image_loaded,textbook_name," \
                            "textbook_version_name,grade_name,subject_type,load_time) " \
                            " values('str_path_textbook_image', 'str_textbook_name'," \
                            "        'str_textbook_version_name', 'str_grade_name'," \
                            "        'str_subject_type', CURRENT_TIMESTAMP());", ])

    str_textbook_version_name = textbook_name.split('-')[0].strip(' ')
    str_grade_name = textbook_name.split('-')[1].strip(' ')

    sql = init_sql.replace('str_path_textbook_image', path_textbook_image)
    sql = sql.replace('str_textbook_name', textbook_name)
    sql = sql.replace('str_textbook_version_name', str_textbook_version_name)
    sql = sql.replace('str_grade_name', str_grade_name)
    sql = sql.replace('str_subject_type', subject_type)
    mysql.insert(sql)
    mysql.dispose()
Пример #11
0
def load_question_unit_klpoint(subject_type, textbook_name, file_name_list,
                               tablename_question_textbook,
                               tablename_question_ukt):
    """
    加载文件名对应的章节、知识点信息
    文件名示例: 第1章 两、三位数乘一位数-整数的乘法及应用-单选.csv
               1  时、分、秒-时、分、秒及其关系、单位换算与计算-判断题.csv
    注:由于文件名格式有错误,加载后需要手工编辑数据
    """
    mysql = msc.MyPymysqlPool("dbMysql")

    # 读取数据表中的目录名
    sql0 = ''.join(['select textbook_id from ', tablename_question_textbook, \
                   " where textbook_name='", textbook_name, "' and subject_type='", subject_type, "';", ])
    rstData = mysql.getAll(sql0)
    textbook_id = pd.DataFrame(list(list(x.values())
                                    for x in rstData)).iloc[0][0]

    init_sql_quk = ''.join(['insert into ', tablename_question_ukt, \
                            "(file_name,textbook_id,unit_name," \
                            "knowledge_name,question_type_name,load_time) " \
                            " values('str_file_name', ", str(textbook_id), ", 'str_unit_name'," \
                            "        'str_knowledge_name', 'str_question_type_name', CURRENT_TIMESTAMP());", ])

    # 文件名:"1 观察物体(三)-正方体的特征-判断题.csv"
    for str_file_name in file_name_list:
        str_file_name_prefix = str_file_name.split('.')[0].strip(' ')
        str_unit_name = str_file_name_prefix.split('-')[0]
        str_knowledge_name = str_file_name_prefix.split('-')[1]
        str_question_type_name = str_file_name_prefix.split('-')[2]

        sql = init_sql_quk.replace('str_file_name', str_file_name)
        sql = sql.replace('str_unit_name', str_unit_name)
        sql = sql.replace('str_knowledge_name', str_knowledge_name)
        sql = sql.replace('str_question_type_name', str_question_type_name)
        # print(sql)
        mysql.insert(sql)
    mysql.dispose()
def write_segmented_clearwords(tablename, model_filepath):
    mysql = msc.MyPymysqlPool("dbMysql")

    sql = ''.join(['SELECT question_seqno, question_stem_segment FROM ', \
                   tablename, " where is_segmented=1 and grade_name like '三年级%'", ])
    rst = mysql.getAll(sql)

    model = word2vec.Word2Vec.load(model_filepath)
    vocab = list(model.wv.vocab.keys())
    for record in rst:
        seqno = record["question_seqno"]
        segwords = record["question_stem_segment"]
        segwords_list = segwords.strip(' ').split(' ')
        segwords_clear_list = tml.clear_word_from_vocab(segwords_list, vocab)  # 清除不在model中的单词
        segwords_clear = ' '.join(segwords_clear_list)  # 转换为字符串
        sql = ''.join(['update ', tablename, " set question_stem_segment_clear_rebuild='", \
                       str(segwords_clear), "' where question_seqno=", str(seqno), ";"])
        print(sql)
        try:
            mysql.update(sql)
            mysql.end()
        except Exception:
            print('update error: ', sql)
    mysql.dispose()
def gen_quanitle(logpath, source_table, target_table, dt):
    mysql = msc.MyPymysqlPool("dbMysql")
    sql = "select hour,ind_first_industry_id,site_set,promoted_object_type,group_concat(ctr) as ctrs from {0} \
           where ind_first_industry_id is not null and site_set is not null and promoted_object_type is not null \
           and dt>='{1}' group by hour,ind_first_industry_name,site_set,promoted_object_type;".format(
        source_table, dt)
    try:
        rstData = mysql.getAll(sql)
    except BaseException as e:
        logerror = 'Error occurred when get data from {0} for ctr_level:{1}'.format(
            table, e)
        print(logerror)
        # gl.write_log(logpath, 'error', logerror)
        return False
    finally:
        mysql.dispose()

    if not rstData:
        logerror = 'Get nothing from {0} for ctr_level'.format(table)
        print(logerror)
        # gl.write_log(logpath, 'error', logerror)
        return False

    dataSet = pd.DataFrame(list(rstData))

    quanitle_lists = []
    for i in range(dataSet.shape[0]):
        hour = str(dataSet.iloc[i]['hour'])
        indst_id = str(dataSet.iloc[i]['ind_first_industry_id'])
        site = str(dataSet.iloc[i]['site_set'])
        prd = str(dataSet.iloc[i]['promoted_object_type'])
        ctr_q1, ctr_q2 = calculate_quanitle(str(dataSet.iloc[i]['ctrs']))
        quanitle_list = [hour, site, prd, indst_id, ctr_q1, ctr_q2]
        quanitle_lists.append(quanitle_list)

    # 插入前清空
    mysql = msc.MyPymysqlPool("dbMysql")
    sql = "truncate table {0};".format(target_table)
    try:
        mysql.delete(sql)
    except BaseException as e:
        logerror = 'Error occurred when truncate table {0}:{1}'.format(
            target_table, e)
        print(logerror)
        # gl.write_log(logpath, 'error', logerror)
        return False
    finally:
        mysql.dispose()

    # 将最新的分位数据插入维表
    mysql = msc.MyPymysqlPool("dbMysql")
    sql = "insert into {0}(hour,site_set,promoted_object_type,ind_first_industry_id,ctr_q1,ctr_q2) \
           VALUES (%s,%s,%s,%s,%s,%s);".format(target_table)
    try:
        mysql.insertMany(sql, quanitle_lists)
    except BaseException as e:
        logerror = 'Error occurred when insert data into {0}:{1}'.format(
            target_table, e)
        print(logerror)
        # gl.write_log(logpath, 'error', logerror)
        return False
    finally:
        mysql.dispose()

    return True
def create_merge(dt):
    # 若对应表已经存在,则先删除
    sql = "drop table if exists dspbase.dsp_ctr_daily_merge%s" % dt
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.delete(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    
    # 创建空表
    sql = "CREATE TABLE dspbase.dsp_ctr_daily_merge%s ( \
    `log_dt` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL, \
    `log_day` date DEFAULT NULL, \
    `log_hour` tinyint(4) DEFAULT NULL, \
    `log_imp_flag` tinyint(4) DEFAULT NULL, \
    `log_clk_flag` tinyint(4) DEFAULT NULL, \
    `log_kdgresponse_id` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '对应kdgreqeust中的id', \
    `log_downrspcontent` text COLLATE utf8mb4_unicode_ci COMMENT '响应信息', \
    `log_isrespsucc` varchar(10) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '是否返回(0,没有返回,1返回)', \
    `log_accountid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '账户id', \
    `log_advertiserid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '广告主id', \
    `log_campaignid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '计划id', \
    `log_creativeid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '素材id', \
    `log_mediashowid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体外部id', \
    `log_channel` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '渠道id', \
    `log_devicetype` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备类型,os、android、other', \
    `log_adsize` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '广告尺寸', \
    `log_ctr` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '当前投放素材的ctr', \
    `log_media` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体', \
    `log_adspace` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '广告位', \
    `log_adunit` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体传的原始的广告位的id', \
    `log_adtype` int(11) DEFAULT NULL COMMENT '展示类型', \
    `log_istest` int(11) DEFAULT NULL COMMENT '是否是测试流量   0.正式   1.测试', \
    `log_ip` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '客户端ip', \
    `log_mcf` varchar(10) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体一级分类', \
    `log_mcs` varchar(10) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体二级分类', \
    `log_region` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '地域', \
    `log_time` bigint(20) DEFAULT NULL COMMENT '时间戳', \
    `log_logtype` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '日志类型', \
    `log_token` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL, \
    `log_adnormaltype` int(11) DEFAULT NULL COMMENT '反作弊类型,0表示正常', \
    `log_mediaid` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体id', \
    `log_platid` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '平台id', \
    `log_incost` double(20,2) DEFAULT NULL COMMENT '成本', \
    `log_outcost` double(20,2) DEFAULT NULL COMMENT '消耗', \
    `log_adcost` double(20,2) DEFAULT NULL COMMENT '广告主价格', \
    `log_status` int(11) DEFAULT NULL COMMENT '状态:  0.时间太长失效; 1.正常', \
    `log_kdgrequest_imp_num` int(11) DEFAULT NULL, \
    `log_kdgrequest_device_devicetype` int(11) DEFAULT NULL COMMENT '0.未知;1.pc;2.phone;3.pad;4.tv', \
    `log_kdgrequest_device_make` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备制造商', \
    `log_kdgrequest_device_model` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备型号', \
    `log_kdgrequest_device_os` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '如:Android、IOS、WindowsPhone', \
    `log_kdgrequest_device_osv` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '操作系统版本;', \
    `log_kdgrequest_device_h` int(11) DEFAULT NULL COMMENT '设备屏幕纵向分辨率', \
    `log_kdgrequest_device_w` int(11) DEFAULT NULL COMMENT '设备屏幕横向分辨率', \
    `log_kdgrequest_device_carrier` int(11) DEFAULT NULL COMMENT '运营商。0.未知;1.中国移动;2.中国联通;3.中国电信', \
    `log_kdgrequest_device_connectiontype` int(11) DEFAULT NULL COMMENT '网络连接类型。0.未知;1.WIFI;2.2G;3.3G;4.4G;5.5G', \
    `log_kdgrequest_device_ifa` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT 'iOS终端设备的明文。', \
    `log_kdgrequest_device_imeimd5` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备IMEI(MEID)号,SHA1加密串。', \
    `ader_brand` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '品牌名称', \
    `ader_com_addr` varchar(225) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '公司注册地址', \
    `ader_category` varchar(10) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '0' COMMENT '广告主行业分类', \
    `ader_settlement_type` tinyint(4) DEFAULT '1' COMMENT '结算方式 1 CPM 2 CPC', \
    `ader_firstindustry` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '一级行业分类', \
    `ader_secondindustry` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '二级行业分类', \
    `ader_servicerate` tinyint(3) DEFAULT NULL COMMENT '服务费率', \
    `ader_priority` tinyint(4) DEFAULT NULL COMMENT '优先级', \
    `ader_credit` int(11) DEFAULT NULL COMMENT '信用金', \
    `ader_status` tinyint(3) NOT NULL DEFAULT '0' COMMENT '状态(0正常,1删除)', \
    `ader_flag` tinyint(3) NOT NULL DEFAULT '1' COMMENT '1开启,2暂停', \
    `ader_auth_status` tinyint(3) DEFAULT '0' COMMENT '内审状态0待审核,1审核通过,2审核拒绝', \
    `ader_audit_status` tinyint(4) NOT NULL DEFAULT '0' COMMENT '外审状态(0待审核,1通过,2拒绝,3部分通过)', \
    `camp_name` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '计划名称', \
    `camp_status` tinyint(3) NOT NULL DEFAULT '0' COMMENT '计划状态(0正常,1删除)', \
    `camp_flag` tinyint(3) NOT NULL DEFAULT '1' COMMENT '1开启,2暂停', \
    `camp_is_status` tinyint(3) DEFAULT '1' COMMENT '操作来源:1web,2曝光到量,3点击到量,4到期,5程序开启,6日预算到量,7周预算到量,8广告主余额不足', \
    `camp_start_date` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '开始时间', \
    `camp_end_date` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '结束日期', \
    `camp_launch_type` tinyint(4) NOT NULL DEFAULT '1' COMMENT '投放类型  1 尽快投放  2 匀速投放', \
    `camp_settlement_type` tinyint(4) DEFAULT '1' COMMENT '结算类型:1CPM,2CPC', \
    `camp_settlement_price` decimal(10,2) DEFAULT '0.00' COMMENT '结算价格(元)', \
    `camp_budget_type` tinyint(4) DEFAULT '0' COMMENT '预算控制(0不限,1周预算,2日预算)', \
    `camp_budget_num` decimal(20,2) DEFAULT '0.00' COMMENT '预算金额', \
    `camp_frequency_type` tinyint(4) DEFAULT '0' COMMENT '频次控制:(0不限,1周期内单人曝光不超过,2每天单人曝光不超过,3每小时单人曝光不超过', \
    `camp_frequency_num` int(11) DEFAULT '0' COMMENT '频次', \
    `camp_imp_limit` int(11) DEFAULT '0' COMMENT '曝光上限0表示不限', \
    `camp_click_limit` int(11) DEFAULT '0' COMMENT '点击上限0表示不限', \
    `camp_orient_time` text COLLATE utf8mb4_unicode_ci COMMENT '时段定向', \
    `camp_orient_aear` text COLLATE utf8mb4_unicode_ci COMMENT '地域定向', \
    `camp_orient_network` text COLLATE utf8mb4_unicode_ci COMMENT '网络定向(wifi,5G,4G,3G,2G,gprs)', \
    `camp_orient_system` text COLLATE utf8mb4_unicode_ci COMMENT '系统定向(android,ios,windows,mac OS,other)', \
    `camp_orient_plat` text COLLATE utf8mb4_unicode_ci COMMENT '渠道定向', \
    `camp_orient_media` text COLLATE utf8mb4_unicode_ci COMMENT '媒体定向', \
    `camp_orient_adslot` text COLLATE utf8mb4_unicode_ci COMMENT '广告位定向', \
    `camp_orient_media_crowd` text COLLATE utf8mb4_unicode_ci COMMENT '媒体人群定向,json字符串', \
    `camp_orient_interest` text COLLATE utf8mb4_unicode_ci COMMENT '兴趣定向', \
    `ctv_width` int(11) NOT NULL COMMENT '宽', \
    `ctv_height` int(11) NOT NULL COMMENT '高', \
    `ctv_type` varchar(50) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '创意类型(1banner,2video,3native)', \
    `ctv_style_id` int(11) DEFAULT '1' COMMENT '广告素材类型', \
    `ctv_link_type` tinyint(4) NOT NULL DEFAULT '1' COMMENT '1:H5,2:App下载,3:应用直达', \
    `ctv_size` int(11) NOT NULL COMMENT '素材大小', \
    `ctv_deep_link` text COLLATE utf8mb4_unicode_ci COMMENT 'link_type为1:空。为2:iOS下载地址。为3:json对象包含落地页和唤醒地址', \
    `ctv_first_category` int(8) DEFAULT NULL COMMENT '一级分类', \
    `ctv_second_category` int(8) DEFAULT NULL COMMENT '二级分类', \
    `ctv_label` text COLLATE utf8mb4_unicode_ci COMMENT '标签标识', \
    `ctv_remark` text COLLATE utf8mb4_unicode_ci COMMENT '备注', \
    KEY `idx1` (`log_day`,`log_creativeid`,`log_kdgrequest_device_ifa`(191),`log_kdgrequest_device_imeimd5`(191)), \
    KEY `idx2` (`log_day`,`log_kdgrequest_device_ifa`(191)), \
    KEY `idx3` (`log_day`,`log_kdgrequest_device_imeimd5`(191)), \
    KEY `idx4` (`log_day`,`log_creativeid`) \
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; " % dt
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
def insert_dsp_train_base(dt):
    log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8]
    
    # 若存在该天数据,则首先删除
    sql = "delete from dspbase.dsp_ctr_train where log_day=\'%s\'" % (log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.delete(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    
    # 向训练表中插入有点击的数据
    sql = "insert into dspbase.dsp_ctr_train                                                    \
(log_token                                                                   \
,log_dt                                                                      \
,log_day                                                                     \
,log_hour                                                                    \
,log_clk_flag                                                                \
,log_advertiserid                                                            \
,log_campaignid                                                              \
,log_creativeid                                                              \
,log_mediashowid                                                             \
,log_media                                                                   \
,log_channel                                                                 \
,log_mcf                                                                     \
,log_mcs                                                                     \
,log_kdgrequest_device_devicetype                                            \
,log_kdgrequest_device_make                                                  \
,log_kdgrequest_device_model                                                 \
,log_kdgrequest_device_os                                                    \
,log_kdgrequest_device_carrier                                               \
,log_kdgrequest_device_connectiontype                                        \
,log_kdgrequest_device_ifa                                                   \
,log_kdgrequest_device_imeimd5                                               \
,log_adspace                                                                 \
,log_adunit                                                                  \
,log_adtype                                                                  \
,log_adsize                                                                  \
,log_region_country                                                          \
,log_region_province                                                         \
,log_region_city                                                             \
,ader_brand                                                                  \
,ader_secondindustry                                                         \
,camp_settlement_price                                                       \
,ctv_size                                                                    \
,ctv_label                                                                   \
,ctv_type                                                                    \
,ctv_style_id                                                                \
,ctv_link_type                                                               \
,ctv_second_category                                                         \
,ctv_remark                                                                  \
,truncated_ifa                                                               \
,ifa_cross_ctv                                                               \
,ifa_cross_ad                                                                \
,ifa_cross_campaign                                                          \
,ifa_cross_ctv_style                                                         \
,ifa_cross_ader_brand                                                        \
,ifa_cross_ader_secondindustry                                               \
,ifa_cross_ctv_second_category                                               \
,truncated_imei                                                              \
,imei_cross_ctv                                                              \
,imei_cross_ad                                                               \
,imei_cross_campaign                                                         \
,imei_cross_ctv_style                                                        \
,imei_cross_ader_brand                                                       \
,imei_cross_ader_secondindustry                                              \
,imei_cross_ctv_second_category                                              \
,adsp_last7days_ctr                                                          \
,adsp_yesterday_ctr                                                          \
,adsp_last7days_clk_cnt                                                      \
,adsp_yesterday_clk_cnt                                                      \
,ader_last7days_ctr                                                          \
,ader_yesterday_ctr                                                          \
,ader_last7days_clk_cnt                                                      \
,ader_yesterday_clk_cnt                                                      \
,ctv_last7days_ctr                                                           \
,ctv_yesterday_ctr                                                           \
,ctv_last7days_clk_cnt                                                       \
,ctv_yesterday_clk_cnt                                                       \
,media_last7days_ctr                                                         \
,media_yesterday_ctr                                                         \
,media_last7days_clk_cnt                                                     \
,media_yesterday_clk_cnt                                                     \
,city_last7days_ctr                                                          \
,city_yesterday_ctr                                                          \
,city_last7days_clk_cnt                                                      \
,city_yesterday_clk_cnt                                                      \
,ifa_last7days_ctr                                                           \
,ifa_yesterday_ctr                                                           \
,ifa_last7days_clk_cnt                                                       \
,ifa_yesterday_clk_cnt                                                       \
,imei_last7days_ctr                                                          \
,imei_yesterday_ctr                                                          \
,imei_last7days_clk_cnt                                                      \
,imei_yesterday_clk_cnt                                                      \
)                                                                            \
select                                                                       \
    t1.log_token                                                               \
    ,t1.log_dt                                                                 \
    ,t1.log_day                                                                \
    ,t1.log_hour                                                               \
    ,t1.log_clk_flag                                                           \
    ,t1.log_advertiserid                                                       \
    ,t1.log_campaignid                                                         \
    ,t1.log_creativeid                                                         \
    ,t1.log_mediashowid                                                        \
    ,t1.log_media                                                              \
    ,t1.log_channel                                                            \
    ,t1.log_mcf                                                                \
    ,t1.log_mcs                                                                \
    ,cast(t1.log_kdgrequest_device_devicetype as char(50))                     \
    ,t1.log_kdgrequest_device_make                                             \
    ,t1.log_kdgrequest_device_model                                            \
    ,t1.log_kdgrequest_device_os                                               \
    ,cast(t1.log_kdgrequest_device_carrier as char(50))                        \
    ,cast(t1.log_kdgrequest_device_connectiontype as char(50))                 \
    ,t1.log_kdgrequest_device_ifa                                              \
    ,t1.log_kdgrequest_device_imeimd5                                          \
    ,t1.log_adspace                                                            \
    ,t1.log_adunit                                                             \
    ,t1.log_adtype                                                             \
    ,cast(t1.log_adtype as char(50))                                           \
    ,t8.country                                                                \
    ,t8.province                                                               \
    ,t8.city                                                                   \
    ,t1.ader_brand                                                             \
    ,t1.ader_secondindustry                                                    \
    ,t1.camp_settlement_price                                                  \
    ,t1.ctv_size                                                               \
    ,t1.ctv_label                                                              \
    ,t1.ctv_type                                                               \
    ,cast(t1.ctv_style_id as char(50))                                         \
    ,cast(t1.ctv_link_type as char(50))                                        \
    ,cast(t1.ctv_second_category as char(50))                                  \
    ,t1.ctv_remark                                                             \
    ,right(t1.log_kdgrequest_device_ifa,2)                                     \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_creativeid)          \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_adunit)              \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_campaignid)          \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_style_id)            \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_brand)              \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_secondindustry)     \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_second_category)     \
    ,right(t1.log_kdgrequest_device_imeimd5,2)                                 \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_creativeid)      \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_adunit)          \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_campaignid)      \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_style_id)        \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_brand)          \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_secondindustry) \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_second_category) \
    ,t22.click_cnt_week/t22.impression_cnt_week                                 \
    ,t22.click_cnt/t22.impression_cnt                                           \
    ,t22.click_cnt_week                                                        \
    ,t22.click_cnt                                                             \
    ,t2.click_cnt_week/t2.impression_cnt_week                                  \
    ,t2.click_cnt/t2.impression_cnt                                            \
    ,t2.click_cnt_week                                                         \
    ,t2.click_cnt                                                            	\
    ,t3.click_cnt_week/t3.impression_cnt_week                                  \
    ,t3.click_cnt/t3.impression_cnt                                            \
    ,t3.click_cnt_week                                                         \
    ,t3.click_cnt                                                              \
    ,t4.click_cnt_week/t4.impression_cnt_week                                  \
    ,t4.click_cnt/t4.impression_cnt                                            \
    ,t4.click_cnt_week                                                         \
    ,t4.click_cnt                                                              \
    ,t5.click_cnt_week/t5.impression_cnt_week                                  \
    ,t5.click_cnt/t5.impression_cnt                                            \
    ,t5.click_cnt_week                                                         \
    ,t5.click_cnt                                                              \
    ,t6.click_cnt_week/t6.impression_cnt_week                                  \
    ,t6.click_cnt/t6.impression_cnt                                            \
    ,t6.click_cnt_week                                                         \
    ,t6.click_cnt                                                              \
    ,t7.click_cnt_week/t7.impression_cnt_week                                  \
    ,t7.click_cnt/t7.impression_cnt                                            \
    ,t7.click_cnt_week                                                         \
    ,t7.click_cnt                                                              \
from dspbase.dsp_ctr_daily_merge%s t1                                                \
left join dspbase.dsp_ctr_stat_adspace t22                                           \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t22.log_day                        \
and t1.log_adspace = t22.log_adspace                                         \
left join dspbase.dsp_ctr_stat_advertiser t2                                         \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t2.log_day                         \
and t1.log_advertiserid = t2.log_advertiserid                                \
left join dspbase.dsp_ctr_stat_creative t3                                           \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t3.log_day                         \
and t1.log_creativeid = t3.log_creativeid                                    \
left join dspbase.dsp_ctr_stat_media t4                                              \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t4.log_day                         \
and t1.log_media = t4.log_media                                              \
left join dspbase.dsp_ctr_stat_city t5                                               \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t5.log_day                         \
and t1.log_region = t5.log_region_city                                       \
left join dspbase.dsp_ctr_stat_ifa t6                                                \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t6.log_day                         \
and t1.log_kdgrequest_device_ifa = t6.log_kdgrequest_device_ifa              \
left join dspbase.dsp_ctr_stat_imei t7                                               \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t7.log_day                         \
and t1.log_kdgrequest_device_imeimd5 = t7.log_kdgrequest_device_imeimd5      \
left join dspbase.dsp_config_city t8                                                 \
on t1.log_region = t8.city                                                   \
where t1.log_istest=0                                                        \
and locate('测试',t1.ader_brand)=0                                             \
and t1.log_adNormalType=0                                                    \
and t1.log_clk_flag=1;" % (dt)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 有曝光无点击的数据,下采样,倍率为0.025
    sql1 = "select cast((select count(*)*0.025 from dspbase.dsp_ctr_daily_merge%s where log_clk_flag=0) as signed);" % (dt)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        num = mysql.getOne(sql1)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    num = list(num.values())[0]
    # 向训练表中插入下采样数据
    sql2 = "insert into dspbase.dsp_ctr_train                                                    \
(log_token                                                                   \
,log_dt                                                                      \
,log_day                                                                     \
,log_hour                                                                    \
,log_clk_flag                                                                \
,log_advertiserid                                                            \
,log_campaignid                                                              \
,log_creativeid                                                              \
,log_mediashowid                                                             \
,log_media                                                                   \
,log_channel                                                                 \
,log_mcf                                                                     \
,log_mcs                                                                     \
,log_kdgrequest_device_devicetype                                            \
,log_kdgrequest_device_make                                                  \
,log_kdgrequest_device_model                                                 \
,log_kdgrequest_device_os                                                    \
,log_kdgrequest_device_carrier                                               \
,log_kdgrequest_device_connectiontype                                        \
,log_kdgrequest_device_ifa                                                   \
,log_kdgrequest_device_imeimd5                                               \
,log_adspace                                                                 \
,log_adunit                                                                  \
,log_adtype                                                                  \
,log_adsize                                                                  \
,log_region_country                                                          \
,log_region_province                                                         \
,log_region_city                                                             \
,ader_brand                                                                  \
,ader_secondindustry                                                         \
,camp_settlement_price                                                       \
,ctv_size                                                                    \
,ctv_label                                                                   \
,ctv_type                                                                    \
,ctv_style_id                                                                \
,ctv_link_type                                                               \
,ctv_second_category                                                         \
,ctv_remark                                                                  \
,truncated_ifa                                                               \
,ifa_cross_ctv                                                               \
,ifa_cross_ad                                                                \
,ifa_cross_campaign                                                          \
,ifa_cross_ctv_style                                                         \
,ifa_cross_ader_brand                                                        \
,ifa_cross_ader_secondindustry                                               \
,ifa_cross_ctv_second_category                                               \
,truncated_imei                                                              \
,imei_cross_ctv                                                              \
,imei_cross_ad                                                               \
,imei_cross_campaign                                                         \
,imei_cross_ctv_style                                                        \
,imei_cross_ader_brand                                                       \
,imei_cross_ader_secondindustry                                              \
,imei_cross_ctv_second_category                                              \
,adsp_last7days_ctr                                                          \
,adsp_yesterday_ctr                                                          \
,adsp_last7days_clk_cnt                                                      \
,adsp_yesterday_clk_cnt                                                      \
,ader_last7days_ctr                                                          \
,ader_yesterday_ctr                                                          \
,ader_last7days_clk_cnt                                                      \
,ader_yesterday_clk_cnt                                                      \
,ctv_last7days_ctr                                                           \
,ctv_yesterday_ctr                                                           \
,ctv_last7days_clk_cnt                                                       \
,ctv_yesterday_clk_cnt                                                       \
,media_last7days_ctr                                                         \
,media_yesterday_ctr                                                         \
,media_last7days_clk_cnt                                                     \
,media_yesterday_clk_cnt                                                     \
,city_last7days_ctr                                                          \
,city_yesterday_ctr                                                          \
,city_last7days_clk_cnt                                                      \
,city_yesterday_clk_cnt                                                      \
,ifa_last7days_ctr                                                           \
,ifa_yesterday_ctr                                                           \
,ifa_last7days_clk_cnt                                                       \
,ifa_yesterday_clk_cnt                                                       \
,imei_last7days_ctr                                                          \
,imei_yesterday_ctr                                                          \
,imei_last7days_clk_cnt                                                      \
,imei_yesterday_clk_cnt                                                      \
)                                                                            \
select                                                                       \
    t1.log_token                                                               \
    ,t1.log_dt                                                                 \
    ,t1.log_day                                                                \
    ,t1.log_hour                                                               \
    ,t1.log_clk_flag                                                           \
    ,t1.log_advertiserid                                                       \
    ,t1.log_campaignid                                                         \
    ,t1.log_creativeid                                                         \
    ,t1.log_mediashowid                                                        \
    ,t1.log_media                                                              \
    ,t1.log_channel                                                            \
    ,t1.log_mcf                                                                \
    ,t1.log_mcs                                                                \
    ,cast(t1.log_kdgrequest_device_devicetype as char(50))                     \
    ,t1.log_kdgrequest_device_make                                             \
    ,t1.log_kdgrequest_device_model                                            \
    ,t1.log_kdgrequest_device_os                                               \
    ,cast(t1.log_kdgrequest_device_carrier as char(50))                        \
    ,cast(t1.log_kdgrequest_device_connectiontype as char(50))                 \
    ,t1.log_kdgrequest_device_ifa                                              \
    ,t1.log_kdgrequest_device_imeimd5                                          \
    ,t1.log_adspace                                                            \
    ,t1.log_adunit                                                             \
    ,t1.log_adtype                                                             \
    ,cast(t1.log_adtype as char(50))                                           \
    ,t8.country                                                                \
    ,t8.province                                                               \
    ,t8.city                                                                   \
    ,t1.ader_brand                                                             \
    ,t1.ader_secondindustry                                                    \
    ,t1.camp_settlement_price                                                  \
    ,t1.ctv_size                                                               \
    ,t1.ctv_label                                                              \
    ,t1.ctv_type                                                               \
    ,cast(t1.ctv_style_id as char(50))                                         \
    ,cast(t1.ctv_link_type as char(50))                                        \
    ,cast(t1.ctv_second_category as char(50))                                  \
    ,t1.ctv_remark                                                             \
    ,right(t1.log_kdgrequest_device_ifa,2)                                     \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_creativeid)          \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_adunit)              \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_campaignid)          \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_style_id)            \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_brand)              \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_secondindustry)     \
    ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_second_category)     \
    ,right(t1.log_kdgrequest_device_imeimd5,2)                                 \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_creativeid)      \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_adunit)          \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_campaignid)      \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_style_id)        \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_brand)          \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_secondindustry) \
    ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_second_category) \
    ,t22.click_cnt_week/t22.impression_cnt_week                                 \
    ,t22.click_cnt/t22.impression_cnt                                           \
    ,t22.click_cnt_week                                                        \
    ,t22.click_cnt                                                             \
    ,t2.click_cnt_week/t2.impression_cnt_week                                  \
    ,t2.click_cnt/t2.impression_cnt                                            \
    ,t2.click_cnt_week                                                         \
    ,t2.click_cnt	                                                           \
    ,t3.click_cnt_week/t3.impression_cnt_week                                  \
    ,t3.click_cnt/t3.impression_cnt                                            \
    ,t3.click_cnt_week                                                         \
    ,t3.click_cnt                                                              \
    ,t4.click_cnt_week/t4.impression_cnt_week                                  \
    ,t4.click_cnt/t4.impression_cnt                                            \
    ,t4.click_cnt_week                                                         \
    ,t4.click_cnt                                                              \
    ,t5.click_cnt_week/t5.impression_cnt_week                                  \
    ,t5.click_cnt/t5.impression_cnt                                            \
    ,t5.click_cnt_week                                                         \
    ,t5.click_cnt                                                              \
    ,t6.click_cnt_week/t6.impression_cnt_week                                  \
    ,t6.click_cnt/t6.impression_cnt                                            \
    ,t6.click_cnt_week                                                         \
    ,t6.click_cnt                                                              \
    ,t7.click_cnt_week/t7.impression_cnt_week                                  \
    ,t7.click_cnt/t7.impression_cnt                                            \
    ,t7.click_cnt_week                                                         \
    ,t7.click_cnt                                                              \
from dspbase.dsp_ctr_daily_merge%s t1                                                \
left join dspbase.dsp_ctr_stat_adspace t22                                           \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t22.log_day                        \
and t1.log_adspace = t22.log_adspace                                         \
left join dspbase.dsp_ctr_stat_advertiser t2                                         \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t2.log_day                         \
and t1.log_advertiserid = t2.log_advertiserid                                \
left join dspbase.dsp_ctr_stat_creative t3                                           \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t3.log_day                         \
and t1.log_creativeid = t3.log_creativeid                                    \
left join dspbase.dsp_ctr_stat_media t4                                              \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t4.log_day                         \
and t1.log_media = t4.log_media                                              \
left join dspbase.dsp_ctr_stat_city t5                                               \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t5.log_day                         \
and t1.log_region = t5.log_region_city                                       \
left join dspbase.dsp_ctr_stat_ifa t6                                                \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t6.log_day                         \
and t1.log_kdgrequest_device_ifa = t6.log_kdgrequest_device_ifa              \
left join dspbase.dsp_ctr_stat_imei t7                                               \
on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t7.log_day                         \
and t1.log_kdgrequest_device_imeimd5 = t7.log_kdgrequest_device_imeimd5      \
left join dspbase.dsp_config_city t8                                                 \
on t1.log_region = t8.city                                                   \
where t1.log_istest=0                                                        \
and locate('测试',t1.ader_brand)=0                                             \
and t1.log_adNormalType=0                                                    \
and t1.log_clk_flag=0 order by rand() limit %d;" % (dt, num)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql2)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
Пример #16
0
def load_file_data(tablename_question_stem,
                   tablename_question_textbook,
                   tablename_question_ukt,
                   path_textbook,
                   subject_type,
                   textbook_name,
                   skipfirstrow=True):
    """
    读取数据表中的文件路径,加载文件数据
    """
    mysql = msc.MyPymysqlPool("dbMysql")

    # 读取数据表中的目录名
    sql0 = ''.join(['select file_id,file_name from ', tablename_question_ukt, \
                   " t1 inner join ", tablename_question_textbook, " t2 on t1.textbook_id=t2.textbook_id"
                   " where t2.textbook_name='", textbook_name , "' and t2.subject_type='", subject_type, \
                   "' order by file_id;", ])
    rstData = mysql.getAll(sql0)
    if rstData:
        dataSet = pd.DataFrame(list(list(x.values()) for x in rstData))

        init_sql = ''.join(['insert into ', tablename_question_stem, \
                            "(file_id,row_no,question_stem,question_options,difficulty,image_filename,url,load_time)" \
                           " values(str_file_id, str_row_no, 'str_question_stem', " \
                                   "'str_question_options', str_difficulty, 'str_image_filename', " \
                                   "'str_url', CURRENT_TIMESTAMP());", ])
        for i in range(dataSet.shape[0]):
            str_file_id = str(dataSet.iloc[i][0])
            path_file_name = path_textbook + '/' + str(dataSet.iloc[i][1])

            # 读取文件内容,形如:[',5千克铁的质量大于5000克棉花的质量.( \xa0 ),', '', '难度: 0.47', '*.png']
            # mysql.begin()   # 开启事务
            with open(path_file_name, 'r') as f:
                reader = csv.reader(f)
                row_no = 1
                for line in reader:
                    if skipfirstrow and row_no == 1:  # 跳过首行
                        row_no += 1
                        continue
                    str_question_stem = line[0].strip(',').replace(
                        '。,', '。').replace('\'', '"')
                    str_question_options = line[1].replace('[', '').replace(
                        ']', '').replace('\'', '').replace('、,',
                                                           '、').strip(',')
                    str_difficulty = line[2].strip('难度: ')
                    str_image_filename = line[3].strip(' ')
                    str_url = line[4].strip(' ')
                    # print(line, str_question_stem, str_option, str_difficulty, str_image_filename, str_url)

                    # 插入文件名关联数据
                    sql = init_sql.replace('str_file_id', str_file_id)
                    sql = sql.replace('str_row_no', str(row_no))
                    sql = sql.replace('str_question_stem', str_question_stem)
                    sql = sql.replace('str_question_options',
                                      str_question_options)
                    sql = sql.replace('str_difficulty', str_difficulty)
                    sql = sql.replace('str_image_filename', str_image_filename)
                    sql = sql.replace('str_url', str_url)
                    try:
                        # if str_file_id == '2113' and row_no == 2:
                        #     print('path_file_name1: ', path_file_name, str_file_id, '-', row_no)
                        #     print(sql)
                        mysql.insert(sql)
                    except Exception:
                        print('path_file_name: ', path_file_name, str_file_id,
                              '-', row_no)
                        print(sql)

                    row_no += 1
                f.close()
            mysql.end()  # 结束提交
        mysql.dispose()
def update_deviceid_stat(dt):
    log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8]
    # 若存在该天数据,则首先删除
    sql = "delete from dspbase.dsp_ctr_stat_deviceid where log_day=\'%s\'" % (
        log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.delete(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 插入曝光、点击统计值
    sql1 = "insert into dspbase.dsp_ctr_stat_deviceid                                    \
    (log_day,log_kdgrequest_deviceid,impression_cnt,click_cnt)        \
    select log_day,log_kdgrequest_deviceid,count(*),sum(log_clk_flag) \
    from dspbase.dsp_ctr_daily_merge%s                                              \
    where log_day=\'%s\' and log_kdgrequest_deviceid is not null         \
    group by log_day,log_kdgrequest_deviceid;" % (dt, log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql1)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 删除临时表dsp_ctr_stat_ifa_tmp中数据
    sql2 = "delete from dspbase.dsp_ctr_stat_deviceid_tmp where log_day=\'%s\'" % (
        log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.delete(sql2)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 向临时表中插入统计数据
    sql3 = "insert into dspbase.dsp_ctr_stat_deviceid_tmp                                   \
    select \'%s\',log_kdgrequest_deviceid,sum(impression_cnt),sum(click_cnt) \
    from dspbase.dsp_ctr_stat_deviceid                                                      \
    where log_day between DATE_SUB(\'%s\', INTERVAL 6 DAY) and \'%s\'              \
    and log_kdgrequest_deviceid in                                       \
    (select distinct log_kdgrequest_deviceid                             \
    from dspbase.dsp_ctr_daily_merge%s)                                                \
    group by log_kdgrequest_deviceid;" % (log_day, log_day, log_day, dt)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql3)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 更新impression_cnt_week、click_cnt_week字段
    sql4 = "update dspbase.dsp_ctr_stat_deviceid t1                                \
    left join dspbase.dsp_ctr_stat_deviceid_tmp t2                                 \
    on t1.log_day=t2.log_day                                      \
    and t1.log_kdgrequest_deviceid=t2.log_kdgrequest_deviceid \
    set t1.impression_cnt_week=t2.impression_cnt_week             \
    ,t1.click_cnt_week=t2.click_cnt_week                          \
    where t1.log_day=\'%s\';" % (log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql4)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 清空临时表dsp_ctr_stat_ifa_tmp
    sql5 = "truncate table dspbase.dsp_ctr_stat_deviceid_tmp"
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql5)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
def update_table_minmax():
    # 若存在该天数据,则首先删除
    sql = "delete from dspbase.dsp_ctr_train_minmax where log_day=\'%s\'" % (log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.delete(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    
    sql = "insert into dspbase.dsp_ctr_train_minmax select \'{0}\',{1},{2},{3},{4},{5},{6},{7},{8},{9},{10}\
               ,{11},{12},{13},{14},{15},{16},{17},{18},{19},{20},{21},{22},{23},{24},{25},{26},{27},{28},{29}\
               ,{30},{31},{32},{33},{34},{35},{36},{37},{38},{39},{40},{41},{42},{43},{44},{45},{46},{47},{48},{49},{50}\
               ,{51},{52},{53},{54},{55},{56},{57},{58},{59},{60}".format(log_day
                                                                          , df['camp_settlement_price'].min()
                                                                          , df['ctv_size'].min()
                                                                          , df['adsp_last7days_ctr'].min()
                                                                          , df['adsp_yesterday_ctr'].min()
                                                                          , df['adsp_last7days_clk_cnt'].min()
                                                                          , df['adsp_yesterday_clk_cnt'].min()
                                                                          , df['ader_last7days_ctr'].min()
                                                                          , df['ader_yesterday_ctr'].min()
                                                                          , df['ader_last7days_clk_cnt'].min()
                                                                          , df['ader_yesterday_clk_cnt'].min()
                                                                          , df['ctv_last7days_ctr'].min()
                                                                          , df['ctv_yesterday_ctr'].min()
                                                                          , df['ctv_last7days_clk_cnt'].min()
                                                                          , df['ctv_yesterday_clk_cnt'].min()
                                                                          , df['media_last7days_ctr'].min()
                                                                          , df['media_yesterday_ctr'].min()
                                                                          , df['media_last7days_clk_cnt'].min()
                                                                          , df['media_yesterday_clk_cnt'].min()
                                                                          , df['city_last7days_ctr'].min()
                                                                          , df['city_yesterday_ctr'].min()
                                                                          , df['city_last7days_clk_cnt'].min()
                                                                          , df['city_yesterday_clk_cnt'].min()
                                                                          , df['ifa_last7days_ctr'].min()
                                                                          , df['ifa_yesterday_ctr'].min()
                                                                          , df['ifa_last7days_clk_cnt'].min()
                                                                          , df['ifa_yesterday_clk_cnt'].min()
                                                                          , df['imei_last7days_ctr'].min()
                                                                          , df['imei_yesterday_ctr'].min()
                                                                          , df['imei_last7days_clk_cnt'].min()
                                                                          , df['imei_yesterday_clk_cnt'].min()
                                                                          , df['camp_settlement_price'].max()
                                                                          , df['ctv_size'].max()
                                                                          , df['adsp_last7days_ctr'].max()
                                                                          , df['adsp_yesterday_ctr'].max()
                                                                          , df['adsp_last7days_clk_cnt'].max()
                                                                          , df['adsp_yesterday_clk_cnt'].max()
                                                                          , df['ader_last7days_ctr'].max()
                                                                          , df['ader_yesterday_ctr'].max()
                                                                          , df['ader_last7days_clk_cnt'].max()
                                                                          , df['ader_yesterday_clk_cnt'].max()
                                                                          , df['ctv_last7days_ctr'].max()
                                                                          , df['ctv_yesterday_ctr'].max()
                                                                          , df['ctv_last7days_clk_cnt'].max()
                                                                          , df['ctv_yesterday_clk_cnt'].max()
                                                                          , df['media_last7days_ctr'].max()
                                                                          , df['media_yesterday_ctr'].max()
                                                                          , df['media_last7days_clk_cnt'].max()
                                                                          , df['media_yesterday_clk_cnt'].max()
                                                                          , df['city_last7days_ctr'].max()
                                                                          , df['city_yesterday_ctr'].max()
                                                                          , df['city_last7days_clk_cnt'].max()
                                                                          , df['city_yesterday_clk_cnt'].max()
                                                                          , df['ifa_last7days_ctr'].max()
                                                                          , df['ifa_yesterday_ctr'].max()
                                                                          , df['ifa_last7days_clk_cnt'].max()
                                                                          , df['ifa_yesterday_clk_cnt'].max()
                                                                          , df['imei_last7days_ctr'].max()
                                                                          , df['imei_yesterday_ctr'].max()
                                                                          , df['imei_last7days_clk_cnt'].max()
                                                                          , df['imei_yesterday_clk_cnt'].max())
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
def update_advertiser_stat(dt):
    log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8]
    # 若存在该天数据,则首先删除
    sql = "delete from dspbase.dsp_ctr_stat_advertiser where log_day=\'%s\'" % (
        log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.delete(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
    # 插入曝光、点击统计值
    sql1 = "insert into dspbase.dsp_ctr_stat_advertiser                  \
    (log_day,log_advertiserid,impression_cnt,click_cnt)        \
    select log_day,log_advertiserid,count(*),sum(log_clk_flag) \
    from dspbase.dsp_ctr_daily_merge%s                                   \
    where log_day=\'%s\'                                         \
    group by log_day,log_advertiserid;" % (dt, log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql1)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()

    sql12 = "insert into dspbase.dsp_ctr_stat_advertiser(log_day,log_advertiserid,impression_cnt,click_cnt) \
                 select \'%s\',t1.log_advertiserid,0,0 from dspbase.dsp_ctr_stat_advertiser t1 \
                 left join dspbase.dsp_ctr_stat_advertiser t2 \
                 on t2.log_day=\'%s\' \
                 and t1.log_advertiserid=t2.log_advertiserid \
                 where t1.log_day between DATE_SUB(\'%s\', INTERVAL 7 DAY) and DATE_SUB(\'%s\', INTERVAL 1 DAY) \
                 and t2.log_advertiserid is null \
                 and t1.impression_cnt_week is not null \
                 group by t1.log_advertiserid;" % (log_day, log_day, log_day,
                                                   log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql12)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()

    # 更新impression_cnt_week、click_cnt_week字段
    sql2 = "update dspbase.dsp_ctr_stat_advertiser t0                            \
    INNER JOIN                                                       \
    (select t1.log_day, t1.log_advertiserid                            \
    ,sum(t2.impression_cnt) as impression_cnt_week                   \
    ,sum(t2.click_cnt) as click_cnt_week                             \
    from dspbase.dsp_ctr_stat_advertiser t1                                      \
    left join dspbase.dsp_ctr_stat_advertiser t2                                 \
    on t2.log_day between DATE_SUB(t1.log_day, INTERVAL 6 DAY)       \
    and t1.log_day and t1.log_advertiserid=t2.log_advertiserid           \
    where t1.log_day=\'%s\'                                              \
    group by t1.log_day, t1.log_advertiserid) t3                       \
    on t0.log_day=t3.log_day and t0.log_advertiserid=t3.log_advertiserid \
    set t0.impression_cnt_week=t3.impression_cnt_week                \
    ,t0.click_cnt_week=t3.click_cnt_week                             \
    where t0.log_day=\'%s\';" % (log_day, log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql2)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
def insert_merge(dt):
    # 转换日期格式:20180906 -> 2018-09-06
    log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8]
    # 插入数据
    sql = "insert into dspbase.dsp_ctr_daily_merge%s                  \
    (log_dt                                                       \
    ,log_day                                                      \
    ,log_hour\
    ,log_imp_flag                                                 \
    ,log_clk_flag                                                 \
    ,log_accountid                                                \
    ,log_advertiserid                                             \
    ,log_campaignid                                               \
    ,log_creativeid                                               \
    ,log_mediashowid                                              \
    ,log_channel                                                  \
    ,log_devicetype                                               \
    ,log_adsize                                                   \
    ,log_ctr                                                      \
    ,log_media                                                    \
    ,log_adspace                                                  \
    ,log_adunit                                                   \
    ,log_adtype                                                   \
    ,log_istest                                                   \
    ,log_ip                                                       \
    ,log_mcf                                                      \
    ,log_mcs                                                      \
    ,log_region                                                   \
    ,log_token                                                    \
    ,log_adnormaltype                                             \
    ,log_mediaid                                                  \
    ,log_platid                                                   \
    ,log_incost                                                   \
    ,log_outcost                                                  \
    ,log_adcost                                                   \
    ,log_status                                                   \
    ,log_kdgrequest_imp_num                                       \
    ,log_kdgrequest_device_devicetype                             \
    ,log_kdgrequest_device_make                                   \
    ,log_kdgrequest_device_model                                  \
    ,log_kdgrequest_device_os                                     \
    ,log_kdgrequest_device_h                                      \
    ,log_kdgrequest_device_w                                      \
    ,log_kdgrequest_device_carrier                                \
    ,log_kdgrequest_device_connectiontype                         \
    ,log_kdgrequest_device_ifa                                    \
    ,log_kdgrequest_device_imeimd5                                \
    ,ader_brand                                                   \
    ,ader_firstindustry                                           \
    ,ader_secondindustry                                          \
    ,ader_servicerate                                             \
    ,camp_settlement_type                                         \
    ,camp_settlement_price                                        \
    ,camp_imp_limit                                               \
    ,camp_click_limit                                             \
    ,ctv_width                                                    \
    ,ctv_height                                                   \
    ,ctv_type                                                     \
    ,ctv_style_id                                                 \
    ,ctv_link_type                                                \
    ,ctv_size                                                     \
    ,ctv_deep_link                                                \
    ,ctv_first_category                                           \
    ,ctv_second_category                                          \
    ,ctv_label                                                    \
    ,ctv_remark                                                   \
    )                                                             \
    select                                                        \
    t0.dt                                                         \
    ,left(t0.dt,10)                                               \
    ,right(left(t0.dt,13),2)                                      \
    ,t0.imp_flag                                                  \
    ,t0.clk_flag                                                  \
    ,t0.accountid                                                 \
    ,t0.advertiserid                                              \
    ,t0.campaignid                                                \
    ,t0.creativeid                                                \
    ,t0.mediashowid                                               \
    ,t0.channel                                                   \
    ,t0.devicetype                                                \
    ,t0.adsize                                                    \
    ,t0.ctr                                                       \
    ,t0.media                                                     \
    ,t0.adspace                                                   \
    ,t0.adunit                                                    \
    ,t0.adtype                                                    \
    ,t0.istest                                                    \
    ,t0.ip                                                        \
    ,t0.mcf                                                       \
    ,t0.mcs                                                       \
    ,t0.region                                                    \
    ,t0.token                                                     \
    ,t0.adnormaltype                                              \
    ,t0.mediaid                                                   \
    ,t0.platid                                                    \
    ,t0.incost                                                    \
    ,t0.outcost                                                   \
    ,t0.adcost                                                    \
    ,t0.status                                                    \
    ,t0.kdgrequest_imp_num                                        \
    ,t0.kdgrequest_device_devicetype                              \
    ,t0.kdgrequest_device_make                                    \
    ,t0.kdgrequest_device_model                                   \
    ,t0.kdgrequest_device_os                                      \
    ,t0.kdgrequest_device_h                                       \
    ,t0.kdgrequest_device_w                                       \
    ,t0.kdgrequest_device_carrier                                 \
    ,t0.kdgrequest_device_connectiontype                          \
    ,t0.kdgrequest_device_ifa                                     \
    ,t0.kdgrequest_device_imeimd5                                 \
    ,t1.brand                                                     \
    ,t1.firstindustry                                             \
    ,t1.secondindustry                                            \
    ,t1.servicerate                                               \
    ,t2.settlement_type                                           \
    ,t2.settlement_price                                          \
    ,t2.imp_limit                                                 \
    ,t2.click_limit                                               \
    ,t3.width                                                     \
    ,t3.height                                                    \
    ,t3.type                                                      \
    ,t3.style_id                                                  \
    ,t3.link_type                                                 \
    ,t3.size                                                      \
    ,t3.deep_link                                                 \
    ,t3.first_category                                            \
    ,t3.second_category                                           \
    ,t3.label                                                     \
    ,t3.remark                                                    \
    from dspbase.src_log_info_0907_0929 t0                                        \
    left join dspbase.src_advertiser_info t1 on t0.advertiserid = t1.id   \
    left join dspbase.src_campaign t2 on t0.campaignid = t2.id            \
    left join dspbase.src_creative t3 on t0.creativeid = t3.id where imp_flag <> 0 \
    and t0.dt like \'%s%%\';" % (dt, log_day)
    mysql = ms.MyPymysqlPool("dbMysql")
    try:
        mysql.insert(sql)
        # print(sql)
    except Exception as e:
        print('Error:', e)
    finally:
        mysql.dispose()
def insert_img_infos_db(values):
    sql = "insert into img_info values(%s,%s,%s,%s)"
    mysql = mysql_conn.MyPymysqlPool("dbMysql")
    mysql.insertMany(sql,values)
    mysql.dispose()
def fetch_data(table, dt_info, lmt=''):
    mysql = ms.MyPymysqlPool("dbMysql")
    sql = "select \
          log_token\
          ,log_clk_flag\
          ,log_hour\
          ,log_advertiserid\
          ,log_campaignid\
          ,log_creativeid\
          ,log_mediashowid\
          ,log_media\
          ,log_channel\
          ,log_mcf\
          ,log_mcs\
          ,log_kdgrequest_device_devicetype\
          ,log_kdgrequest_device_make\
          ,log_kdgrequest_device_model\
          ,log_kdgrequest_device_os\
          ,log_kdgrequest_device_carrier\
          ,log_kdgrequest_device_connectiontype\
          ,log_adspace\
          ,log_adunit\
          ,log_adtype\
          ,log_adsize\
          ,log_region_country\
          ,log_region_province\
          ,log_region_city\
          ,ader_brand\
          ,ader_secondindustry\
          ,ctv_label\
          ,ctv_type\
          ,ctv_style_id\
          ,ctv_second_category\
          ,ctv_remark\
          ,truncated_ifa\
          ,truncated_imei\
          ,ifa_cross_ctv\
          ,ifa_cross_ad\
          ,ifa_cross_campaign\
          ,ifa_cross_ctv_style\
          ,ifa_cross_ader_brand\
          ,ifa_cross_ader_secondindustry\
          ,ifa_cross_ctv_second_category\
          ,imei_cross_ctv\
          ,imei_cross_ad\
          ,imei_cross_campaign\
          ,imei_cross_ctv_style\
          ,imei_cross_ader_brand\
          ,imei_cross_ader_secondindustry\
          ,imei_cross_ctv_second_category\
          ,camp_settlement_price\
          ,ctv_size\
          ,ifnull(adsp_last7days_ctr,0)\
          ,ifnull(adsp_yesterday_ctr,0)\
          ,ifnull(adsp_last7days_clk_cnt,0)\
          ,ifnull(adsp_yesterday_clk_cnt,0)\
          ,ifnull(ader_last7days_ctr,0)\
          ,ifnull(ader_yesterday_ctr,0)\
          ,ifnull(ader_last7days_clk_cnt,0)\
          ,ifnull(ader_yesterday_clk_cnt,0)\
          ,ifnull(ctv_last7days_ctr,0)\
          ,ifnull(ctv_yesterday_ctr,0)\
          ,ifnull(ctv_last7days_clk_cnt,0)\
          ,ifnull(ctv_yesterday_clk_cnt,0)\
          ,ifnull(media_last7days_ctr,0)\
          ,ifnull(media_yesterday_ctr,0)\
          ,ifnull(media_last7days_clk_cnt,0)\
          ,ifnull(media_yesterday_clk_cnt,0)\
          ,ifnull(city_last7days_ctr,0)\
          ,ifnull(city_yesterday_ctr,0)\
          ,ifnull(city_last7days_clk_cnt,0)\
          ,ifnull(city_yesterday_clk_cnt,0)\
          ,ifnull(ifa_last7days_ctr,0)\
          ,ifnull(ifa_yesterday_ctr,0)\
          ,ifnull(ifa_last7days_clk_cnt,0)\
          ,ifnull(ifa_yesterday_clk_cnt,0)\
          ,ifnull(imei_last7days_ctr,0)\
          ,ifnull(imei_yesterday_ctr,0)\
          ,ifnull(imei_last7days_clk_cnt,0)\
          ,ifnull(imei_yesterday_clk_cnt,0)\
          from dspbase.%s where log_day between DATE_SUB(\'%s\', INTERVAL 30 DAY) and \'%s\' %s"\
          % (table, dt_info, dt_info, lmt)
    rst = mysql.getAll(sql)
    if not rst:
        mysql.dispose()
        print('				No data!')
        exit()
    else:
        df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('')
        print('		%d rows data have been fetched.' % len(df))
        mysql.dispose()
        return shuffle(df)
Пример #23
0
def fetchDataGDT(trans_tab, dt, indst_id, bill_evt, limit=''):
    mysql = ms.MyPymysqlPool("dbMysql")
    # sql = "SELECT \
    # 		atv_flag \
    # 		,id \
    # 		,adg_ts_workday1 \
    # 		,adg_ts_workday2 \
    # 		,adg_ts_workday3 \
    # 		,adg_ts_workday4 \
    # 		,adg_ts_workday5 \
    # 		,adg_ts_workday6 \
    # 		,adg_ts_workday7 \
    # 		,adg_ts_weekend1 \
    # 		,adg_ts_weekend2 \
    # 		,adg_ts_weekend3 \
    # 		,adg_ts_weekend4 \
    # 		,adg_ts_weekend5 \
    # 		,adg_site_set \
    # 		,adg_product_type \
    # 		,adg_configured_status \
    # 		,ad_adcreative_name \
    # 		,ad_adcreative_name_game_name \
    # 		,ad_adcreative_name_material_type \
    # 		,ad_adcreative_name_material_effect \
    # 		,ad_adcreative_name_device_type \
    # 		,ad_adcreative_name_interest \
    # 		,ad_adcreative_name_behavior \
    # 		,ad_adcreative_name_paid \
    # 		,ade_image_hash \
    # 		,ade_title \
    # 		,ade_image2_hash \
    # 		,ade_image3_hash \
    # 		,ad_space \
    # 		,adc_form_size \
    # 		,adc_form_detail \
    # 		,adc_description \
    # 		,adg_bid_amount \
    # 		,log(adg_crt_days) \
    # 		,atv_ratio_bin11_hisratio_h \
    # 		,adc_drp1_1 \
    # 		,adc_drp1_2 \
    # 		,adc_drp1_3 \
    # 		,adc_drp1_4 \
    # 		,adc_drp1_5 \
    # 		,adc_drp1_6 \
    # 		,adc_drp1_7 \
    # 		,adc_drp1_8 \
    # 		,adc_drp1_9 \
    # 		,adc_drp1_10 \
    # 		,adc_drp1_11 \
    # 		,adc_drp1_12 \
    # 		,adc_drp1_13 \
    # 		,adc_drp1_14 \
    # 		,adc_drp1_15 \
    # 		,adc_drp1_16 \
    # 		,adc_drp1_17 \
    # 		,adc_drp1_18 \
    # 		,adc_drp1_19 \
    # 		,adc_drp1_20 \
    # 		,adc_drp1_21 \
    # 		,adc_drp1_22 \
    # 		,adc_drp1_23 \
    # 		,adc_drp1_24 \
    # 		,adc_drp1_25 \
    # 		,adc_drp1_26 \
    # 		,adc_drp1_27 \
    # 		,adc_drp1_28 \
    # 		,adc_drp1_29 \
    # 		,adc_drp1_30 \
    # 		,adc_drp1_31 \
    # 		,adc_drp1_32 \
    # 		,adc_drp1_33 \
    # 		,adc_drp1_34 \
    # 		,adc_drp1_35 \
    # 		,adc_drp1_36 \
    # 		,adc_drp1_37 \
    # 		,adc_drp1_38 \
    # 		,adc_drp2_1 \
    # 		,adc_drp2_2 \
    # 		,adc_drp2_3 \
    # 		,adc_drp2_4 \
    # 		,adc_drp2_5 \
    # 		,adc_drp2_6 \
    # 		,adc_drp2_7 \
    # 		,adc_drp2_8 \
    # 		,adc_drp2_9 \
    # 		,adc_drp3_1 \
    # 		,adc_drp3_2 \
    # 		,adc_drp3_3 \
    # 		,adc_drp3_4 \
    # 		,adc_drp3_5 \
    # 		,adc_drp3_6 \
    # 		,adc_drp3_7 \
    # 		,adc_drp3_8 \
    # 		,adc_drp3_9 \
    # 		,adc_drp3_10 \
    # 		,adc_drp3_11 \
    # 		,adc_drp3_12 \
    # 		,adc_drp3_13 \
    # 		,adc_drp3_14 \
    # 		,adc_drp3_15 \
    # 		,adc_drp3_16 \
    # 		,adc_drp3_17 \
    # 		,adc_drp3_18 \
    # 		,adc_drp3_19 \
    # 		,adc_drp3_20 \
    # 		,adc_drp3_21 \
    # 		,adc_drp3_22 \
    # 		,adc_drp3_23 \
    # 		,adc_drp3_24 \
    # 		,adc_drp3_25 \
    # 		,adc_drp3_26 \
    # 		,tag_age \
    # 		,tag_gender \
    # 		,tag_education \
    # 		,tag_relationship_status \
    # 		,tag_living_status \
    # 		,tag_business_interest \
    # 		,tag_location \
    # 		,tag_region \
    # 		,tag_gloc_location_types \
    # 		,tag_gloc_regions \
    # 		,tag_gloc_business_districts \
    # 		,tag_user_os \
    # 		,tag_new_device \
    # 		,tag_device_price \
    # 		,tag_network_type \
    # 		,tag_network_operator \
    # 		,tag_dressing_index \
    # 		,tag_uv_index \
    # 		,tag_makeup_index \
    # 		,tag_climate \
    # 		,tag_temperature \
    # 		,tag_app_install_status \
    # 		,tag_abh_object_type \
    # 		,tag_abh_object_id_list \
    # 		,tag_abh_time_window \
    # 		,tag_abh_act_id_list \
    # 		,tag_customized_audience \
    # 		,tag_shopping_capability \
    # 		,tag_player_consupt \
    # 		,tag_paying_user_type \
    # 		,tag_residential_community_price \
    # 		,tag_media_category_wechat \
    # 		,tag_ad_placement_id \
    # 		,tag_media_category_union \
    # 		,tag_qzone_fans \
    # 		,tag_online_scenario \
    # 		,tag_custom_audience \
    # 		,tag_boi_i_targeting_tags \
    # 		,concat(adg_bid_amount,ad_adcreative_name) \
    # 		,concat(adg_bid_amount,ad_adcreative_name,atv_ratio_bin11_hisratio_h) \
    # 		,concat(adg_bid_amount,atv_ratio_bin11_hisratio_h) \
    # 		,concat(adg_bid_amount,tag_gender) \
    # 		,concat(adg_bid_amount,tag_gloc_regions) \
    # 		,concat(adg_bid_amount,tag_gloc_regions,ad_space) \
    # 		,concat(adg_bid_amount,tag_gloc_regions,ad_space,adc_drp1_24) \
    # 		,concat(adg_bid_amount,tag_gloc_regions,adc_drp1_24) \
    # 		,concat(adg_bid_amount,adg_ts_workday1) \
    # 		,concat(adg_bid_amount,adg_ts_workday1,ad_adcreative_name) \
    # 		,concat(adg_bid_amount,adg_ts_workday1,tag_gender) \
    # 		,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions) \
    # 		,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions,ad_space) \
    # 		,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions,ad_space,adc_drp1_24) \
    # 		,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions,adc_drp1_24) \
    # 		,concat(ad_adcreative_name,atv_ratio_bin11_hisratio_h) \
    # 		,concat(ad_adcreative_name,tag_gender) \
    # 		,concat(ad_adcreative_name,tag_gender,atv_ratio_bin11_hisratio_h) \
    # 		,concat(tag_gloc_regions,ad_space) \
    # 		,concat(tag_gloc_regions,ad_space,adc_drp1_24) \
    # 		,concat(tag_gloc_regions,adc_drp1_24) \
    # 		,concat(adg_ts_workday1,ad_adcreative_name) \
    # 		,concat(adg_ts_workday1,ad_adcreative_name,atv_ratio_bin11_hisratio_h) \
    # 		,concat(adg_ts_workday1,ad_adcreative_name,tag_gender) \
    # 		,concat(adg_ts_workday1,atv_ratio_bin11_hisratio_h) \
    # 		,concat(adg_ts_workday1,tag_gender) \
    # 		,concat(adg_ts_workday1,tag_gloc_regions) \
    # 		,concat(adg_ts_workday1,tag_gloc_regions,ad_space) \
    # 		,concat(adg_ts_workday1,tag_gloc_regions,ad_space,adc_drp1_24) \
    # 		,concat(adg_ts_workday1,tag_gloc_regions,adc_drp1_24) \
    # 	    FROM %s \
    # 		where dt in %s and ind_first_industry_id=%d and adg_billing_event='%s' \
    # 		and id <= 100000000 %s " \
    # 		% (trans_tab, dt, indst_id, bill_evt, limit)

    sql = "SELECT \
            atv_flag \
			,id \
			,hour \
            ,day_week \
            ,day_type \
			,adg_ts_workday1 \
			,adg_ts_workday2 \
			,adg_ts_workday3 \
			,adg_ts_workday4 \
			,adg_ts_workday5 \
			,adg_ts_workday6 \
			,adg_ts_workday7 \
			,adg_ts_weekend1 \
			,adg_ts_weekend2 \
			,adg_ts_weekend3 \
			,adg_ts_weekend4 \
			,adg_ts_weekend5 \
			,adg_site_set \
			,adg_product_type \
			,ad_adcreative_name \
			,ade_image_hash \
			,ade_title \
			,ade_title_len \
			,ad_space \
			,adc_form_size \
			,adc_form_detail \
			,adc_description \
			,adg_bid_amount \
			,adg_atv_days \
            ,ad_atv_days \
            ,adgcrt_imp_hours \
            ,adgcrt_last_atv_interval \
			,atv_ratio_bin11_hisratio_h \
            ,atv_ratio_hisavg \
			,adc_drp1_4 \
			,adc_drp1_17 \
			,adc_drp1_23 \
			,adc_drp1_24 \
			,adc_drp1_31 \
			,adc_drp1_32 \
			,adc_drp1_33 \
			,adc_drp3_5 \
			,adc_drp3_16 \
			,adc_drp3_26 \
			,tag_age_0_15 \
			,tag_age_15_20 \
			,tag_age_20_25 \
			,tag_age_25_30 \
			,tag_age_30_35 \
			,tag_age_35_40 \
			,tag_age_40_45 \
			,tag_age_45_50 \
			,tag_age_50 \
			,tag_gender \
			,tag_business_interest \
			,tag_gloc_location_types \
			,tag_gloc_regions \
			,tag_network_type \
			,tag_app_install_status \
			,tag_abh_object_type \
			,tag_abh_object_id_list \
			,tag_abh_time_window \
			,tag_abh_act_id_list \
			,tag_customized_audience \
			,tag_player_consupt \
			,tag_paying_user_type \
            ,concat(day_week,'\|',hour) \
            ,concat(day_type,'\|',hour) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(ad_adcreative_name,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(ad_adcreative_name,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gender,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),'\|',ifnull(adc_drp1_24,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gender,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(ifnull(tag_gloc_regions,''),''),'\|',ifnull(ad_space,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),'\|',ifnull(adc_drp1_24,'')) \
			,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \
			,concat(ifnull(ad_adcreative_name,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \
			,concat(ifnull(ad_adcreative_name,''),'\|',ifnull(tag_gender,'')) \
			,concat(ifnull(ad_adcreative_name,''),'\|',ifnull(tag_gender,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \
			,concat(ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,'')) \
			,concat(ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),ifnull(adc_drp1_24,'')) \
			,concat(ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,''),'\|',ifnull(tag_gender,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gender,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),'\|',ifnull(adc_drp1_24,'')) \
			,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \
			FROM %s \
			where %s and ind_first_industry_id=%d and adg_billing_event='%s' \
			and adg_site_set='tss_SITE_SET_MOBILE_INNER' and id <= 100000000  %s " \
          % (trans_tab, dt, indst_id, bill_evt, limit)

    # print(sql)
    rst = mysql.getAll(sql)

    if not rst:
        mysql.dispose()  # 释放资源
        print('		No data!')
        return -1
    else:
        # 训练模型不能有缺失值,补0
        df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0')
        print('		%d rows data have been fetched.' % len(df))
        mysql.dispose()
        return df