def fetch_segwords(tablename): mysql = msc.MyPymysqlPool("dbMysql") sql = "SELECT t1.keyword_segmented,t2.seqno \ FROM pzbase.ai_keywords_classification_train2 t1 \ inner join pzbase.ai_keywords_classification_classdef t2 \ on t1.class_level1=t2.class_level1 and t1.class_level2=t2.class_level2 and t1.class_level3=t2.class_level3 and t2.valid_flag=1" rst1 = mysql.getAll(sql) sql = "SELECT keyword_segmented,seqno FROM pzbase.ai_keywords_classification_test2_gnb where length(keyword)!=char_length(keyword) and prediction_method is null" rst2 = mysql.getAll(sql) mysql.dispose() return rst1, rst2
def insert_result(tab_rst, tab_image_q, tab_image_lib, imgid_q, mtid, mpkpnum, kpnum_l, kpnum_q): mysql = ms.MyPymysqlPool("dbMysql") sql = "insert into %s(img_id_query, img_id_lib, img_path_query, img_path_lib, \ ratio_kpnum_match, kpnum_match, kpnum_img_query, kpnum_img_lib, op_dt) \ select t1.img_id, t2.img_id, t1.img_path, t2.img_path, %f, %d, %d, %d, SYSDATE() \ from %s t1 \ inner join %s t2 on t2.img_id=%d \ where t1.img_id=%d" % (tab_rst, mpkpnum / min(kpnum_l, kpnum_q), mpkpnum, kpnum_q, kpnum_l, tab_image_q, tab_image_lib, mtid, imgid_q) rnum = mysql.insert(sql) mysql.dispose() # 释放资源 return rnum
def fetch_data(num_s, num_e): mysql = ms.MyPymysqlPool("dbMysql") sql = "SELECT * FROM imgbase.img_sift_features1 \ where img_id between %s and %s" % (num_s, num_e) rst = mysql.getAll(sql) if not rst: mysql.dispose() # 释放资源 print(' No data!') return -1 else: df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0') print(' %d rows data have been fetched.' % len(df)) mysql.dispose() return df
def update_calibration(dt): log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8] # 计算下采样之后'点击/曝光'的比例,并插入dsp_ctr_calibration表中 sql = "update dspbase.dsp_ctr_calibration set train_positive=(select \ sum(case when log_clk_flag=1 then 1 else 0 end) \ from dspbase.dsp_ctr_train where log_day=\'%s\'),train_negative=(select \ sum(case when log_clk_flag=0 then 1 else 0 end) \ from dspbase.dsp_ctr_train where log_day=\'%s\'),train_ratio=(select \ sum(case when log_clk_flag=1 then 1 else 0 end)/sum(case when log_clk_flag=0 then 1 else 0 end) \ from dspbase.dsp_ctr_train where log_day=\'%s\') where log_day=\'%s\'" % (log_day, log_day, log_day, log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.update(sql) except Exception as e: print('Error:', e) finally: mysql.dispose()
def fetch_data(table, dt_info, lmt=''): mysql = ms.MyPymysqlPool("dbMysql") sql = "select \ camp_settlement_price\ ,ctv_size\ ,ifnull(adsp_last7days_ctr,0)\ ,ifnull(adsp_yesterday_ctr,0)\ ,ifnull(adsp_last7days_clk_cnt,0)\ ,ifnull(adsp_yesterday_clk_cnt,0)\ ,ifnull(ader_last7days_ctr,0)\ ,ifnull(ader_yesterday_ctr,0)\ ,ifnull(ader_last7days_clk_cnt,0)\ ,ifnull(ader_yesterday_clk_cnt,0)\ ,ifnull(ctv_last7days_ctr,0)\ ,ifnull(ctv_yesterday_ctr,0)\ ,ifnull(ctv_last7days_clk_cnt,0)\ ,ifnull(ctv_yesterday_clk_cnt,0)\ ,ifnull(media_last7days_ctr,0)\ ,ifnull(media_yesterday_ctr,0)\ ,ifnull(media_last7days_clk_cnt,0)\ ,ifnull(media_yesterday_clk_cnt,0)\ ,ifnull(city_last7days_ctr,0)\ ,ifnull(city_yesterday_ctr,0)\ ,ifnull(city_last7days_clk_cnt,0)\ ,ifnull(city_yesterday_clk_cnt,0)\ ,ifnull(ifa_last7days_ctr,0)\ ,ifnull(ifa_yesterday_ctr,0)\ ,ifnull(ifa_last7days_clk_cnt,0)\ ,ifnull(ifa_yesterday_clk_cnt,0)\ ,ifnull(imei_last7days_ctr,0)\ ,ifnull(imei_yesterday_ctr,0)\ ,ifnull(imei_last7days_clk_cnt,0)\ ,ifnull(imei_yesterday_clk_cnt,0)\ from dspbase.%s where log_day between DATE_SUB(\'%s\', INTERVAL 30 DAY) and \'%s\' %s" \ % (table, dt_info, dt_info, lmt) rst = mysql.getAll(sql) if not rst: mysql.dispose() print(' No data!') exit() else: df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna(-1) print(' %d rows data have been fetched.' % len(df)) mysql.dispose() return df
def fetch_data(tabimg, tabfets, num_s, num_e): mysql = ms.MyPymysqlPool("dbMysql") sql = "SELECT t2.* \ FROM %s t1 \ inner join %s t2 on t1.img_id=t2.img_id \ where t1.img_id between %s and %s" % (tabimg, tabfets, num_s, num_e) rst = mysql.getAll(sql) if not rst: mysql.dispose() # 释放资源 print(' No data!') return -1 else: df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0') print(' %d rows data have been fetched.' % len(df)) mysql.dispose() return df
def insert_question_similarity(model, vocab, dest_records, base_records, threshold, tablename): mysql = msc.MyPymysqlPool("dbMysql") # 计算rst中两两记录之间的相似度,将大于阈值的存入question_similarity_table i = 0 j = 0 for dest_record in dest_records: # mysql.begin() # 开启事务 seqno1 = dest_record[0] stem1 = dest_record[1] segwords1 = dest_record[2] segwords1_list = segwords1.split() max_similarity = -1 max_seqno = -1 for base_record in base_records: seqno2 = int(base_record[0]) stem2 = base_record[1] segwords2 = base_record[2] segwords2_list = segwords2.split() if len(segwords1_list) > 0 and len(segwords2_list) > 0: similarity = calculate_similarity(model, segwords1_list, segwords2_list) if similarity > max_similarity: max_similarity = similarity max_seqno = seqno2 if (similarity >= threshold): sql = ''.join(['insert into ', tablename, "(question_seqno1,question_seqno2,question_stem1,question_stem2," \ "question_stem_segment1,question_stem_segment2,similarity,load_time)" \ " values(", str(seqno1), ",", str(seqno2), ",'", str(stem1), "','", str(stem2), "','", str(segwords1), "','", str(segwords2), "',", str(similarity), \ ", CURRENT_TIMESTAMP());"]) mysql.insert(sql) i += 1 j += 1 print('seqno1, max_seqno, max_similarity: ', seqno1, max_seqno, max_similarity) mysql.end() # 结束提交 mysql.dispose() loginfo = '%d similar rows have been inserted into %s!' % (i, tablename) gl.write_log(logpath, 'info', loginfo)
def fetch_data(tab_rst, lmt_kpsup=0.05, lmt_kpup=1, op_tt='2000-01-01 00:00:00', mtksup=10): mysql = ms.MyPymysqlPool("dbMysql") sql = "SELECT img_path_query,img_path_lib,ratio_kpnum_match,kpnum_match,kpnum_img_query,kpnum_img_lib \ FROM %s \ where ratio_kpnum_match between %f and %f and op_dt>='%s' and kpnum_match>=%d" \ % (tab_rst, lmt_kpsup, lmt_kpup, op_tt, mtksup) rst = mysql.getAll(sql) if not rst: mysql.dispose() # 释放资源 print(' No data!') return -1 else: df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0') print(' %d rows data have been fetched.' % len(df)) mysql.dispose() return df
def fetch_data(sqlstr, dataitems): """执行数据读取SQL。 Args: sqlstr:特征值读取SQL dataitems:特征数据项列表 Returns: df:DataFrame格式SQL返回数据集 """ mysql = ms.MyPymysqlPool("dbMysql") rst = mysql.getAll(sqlstr) if not rst: mysql.dispose() # 释放资源 print('No data: ', dataitems) return None else: df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('') print(' %d rows have been fetched.' % len(df)) mysql.dispose() return df
def load_question_textbook(subject_type, textbook_name, path_textbook_image, tablename): """ 加载教材版本信息 """ mysql = msc.MyPymysqlPool("dbMysql") init_sql = ''.join(['insert into ', tablename, "(path_image_loaded,textbook_name," \ "textbook_version_name,grade_name,subject_type,load_time) " \ " values('str_path_textbook_image', 'str_textbook_name'," \ " 'str_textbook_version_name', 'str_grade_name'," \ " 'str_subject_type', CURRENT_TIMESTAMP());", ]) str_textbook_version_name = textbook_name.split('-')[0].strip(' ') str_grade_name = textbook_name.split('-')[1].strip(' ') sql = init_sql.replace('str_path_textbook_image', path_textbook_image) sql = sql.replace('str_textbook_name', textbook_name) sql = sql.replace('str_textbook_version_name', str_textbook_version_name) sql = sql.replace('str_grade_name', str_grade_name) sql = sql.replace('str_subject_type', subject_type) mysql.insert(sql) mysql.dispose()
def load_question_unit_klpoint(subject_type, textbook_name, file_name_list, tablename_question_textbook, tablename_question_ukt): """ 加载文件名对应的章节、知识点信息 文件名示例: 第1章 两、三位数乘一位数-整数的乘法及应用-单选.csv 1 时、分、秒-时、分、秒及其关系、单位换算与计算-判断题.csv 注:由于文件名格式有错误,加载后需要手工编辑数据 """ mysql = msc.MyPymysqlPool("dbMysql") # 读取数据表中的目录名 sql0 = ''.join(['select textbook_id from ', tablename_question_textbook, \ " where textbook_name='", textbook_name, "' and subject_type='", subject_type, "';", ]) rstData = mysql.getAll(sql0) textbook_id = pd.DataFrame(list(list(x.values()) for x in rstData)).iloc[0][0] init_sql_quk = ''.join(['insert into ', tablename_question_ukt, \ "(file_name,textbook_id,unit_name," \ "knowledge_name,question_type_name,load_time) " \ " values('str_file_name', ", str(textbook_id), ", 'str_unit_name'," \ " 'str_knowledge_name', 'str_question_type_name', CURRENT_TIMESTAMP());", ]) # 文件名:"1 观察物体(三)-正方体的特征-判断题.csv" for str_file_name in file_name_list: str_file_name_prefix = str_file_name.split('.')[0].strip(' ') str_unit_name = str_file_name_prefix.split('-')[0] str_knowledge_name = str_file_name_prefix.split('-')[1] str_question_type_name = str_file_name_prefix.split('-')[2] sql = init_sql_quk.replace('str_file_name', str_file_name) sql = sql.replace('str_unit_name', str_unit_name) sql = sql.replace('str_knowledge_name', str_knowledge_name) sql = sql.replace('str_question_type_name', str_question_type_name) # print(sql) mysql.insert(sql) mysql.dispose()
def write_segmented_clearwords(tablename, model_filepath): mysql = msc.MyPymysqlPool("dbMysql") sql = ''.join(['SELECT question_seqno, question_stem_segment FROM ', \ tablename, " where is_segmented=1 and grade_name like '三年级%'", ]) rst = mysql.getAll(sql) model = word2vec.Word2Vec.load(model_filepath) vocab = list(model.wv.vocab.keys()) for record in rst: seqno = record["question_seqno"] segwords = record["question_stem_segment"] segwords_list = segwords.strip(' ').split(' ') segwords_clear_list = tml.clear_word_from_vocab(segwords_list, vocab) # 清除不在model中的单词 segwords_clear = ' '.join(segwords_clear_list) # 转换为字符串 sql = ''.join(['update ', tablename, " set question_stem_segment_clear_rebuild='", \ str(segwords_clear), "' where question_seqno=", str(seqno), ";"]) print(sql) try: mysql.update(sql) mysql.end() except Exception: print('update error: ', sql) mysql.dispose()
def gen_quanitle(logpath, source_table, target_table, dt): mysql = msc.MyPymysqlPool("dbMysql") sql = "select hour,ind_first_industry_id,site_set,promoted_object_type,group_concat(ctr) as ctrs from {0} \ where ind_first_industry_id is not null and site_set is not null and promoted_object_type is not null \ and dt>='{1}' group by hour,ind_first_industry_name,site_set,promoted_object_type;".format( source_table, dt) try: rstData = mysql.getAll(sql) except BaseException as e: logerror = 'Error occurred when get data from {0} for ctr_level:{1}'.format( table, e) print(logerror) # gl.write_log(logpath, 'error', logerror) return False finally: mysql.dispose() if not rstData: logerror = 'Get nothing from {0} for ctr_level'.format(table) print(logerror) # gl.write_log(logpath, 'error', logerror) return False dataSet = pd.DataFrame(list(rstData)) quanitle_lists = [] for i in range(dataSet.shape[0]): hour = str(dataSet.iloc[i]['hour']) indst_id = str(dataSet.iloc[i]['ind_first_industry_id']) site = str(dataSet.iloc[i]['site_set']) prd = str(dataSet.iloc[i]['promoted_object_type']) ctr_q1, ctr_q2 = calculate_quanitle(str(dataSet.iloc[i]['ctrs'])) quanitle_list = [hour, site, prd, indst_id, ctr_q1, ctr_q2] quanitle_lists.append(quanitle_list) # 插入前清空 mysql = msc.MyPymysqlPool("dbMysql") sql = "truncate table {0};".format(target_table) try: mysql.delete(sql) except BaseException as e: logerror = 'Error occurred when truncate table {0}:{1}'.format( target_table, e) print(logerror) # gl.write_log(logpath, 'error', logerror) return False finally: mysql.dispose() # 将最新的分位数据插入维表 mysql = msc.MyPymysqlPool("dbMysql") sql = "insert into {0}(hour,site_set,promoted_object_type,ind_first_industry_id,ctr_q1,ctr_q2) \ VALUES (%s,%s,%s,%s,%s,%s);".format(target_table) try: mysql.insertMany(sql, quanitle_lists) except BaseException as e: logerror = 'Error occurred when insert data into {0}:{1}'.format( target_table, e) print(logerror) # gl.write_log(logpath, 'error', logerror) return False finally: mysql.dispose() return True
def create_merge(dt): # 若对应表已经存在,则先删除 sql = "drop table if exists dspbase.dsp_ctr_daily_merge%s" % dt mysql = ms.MyPymysqlPool("dbMysql") try: mysql.delete(sql) except Exception as e: print('Error:', e) finally: mysql.dispose() # 创建空表 sql = "CREATE TABLE dspbase.dsp_ctr_daily_merge%s ( \ `log_dt` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL, \ `log_day` date DEFAULT NULL, \ `log_hour` tinyint(4) DEFAULT NULL, \ `log_imp_flag` tinyint(4) DEFAULT NULL, \ `log_clk_flag` tinyint(4) DEFAULT NULL, \ `log_kdgresponse_id` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '对应kdgreqeust中的id', \ `log_downrspcontent` text COLLATE utf8mb4_unicode_ci COMMENT '响应信息', \ `log_isrespsucc` varchar(10) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '是否返回(0,没有返回,1返回)', \ `log_accountid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '账户id', \ `log_advertiserid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '广告主id', \ `log_campaignid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '计划id', \ `log_creativeid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '素材id', \ `log_mediashowid` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体外部id', \ `log_channel` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '渠道id', \ `log_devicetype` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备类型,os、android、other', \ `log_adsize` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '广告尺寸', \ `log_ctr` varchar(50) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '当前投放素材的ctr', \ `log_media` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体', \ `log_adspace` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '广告位', \ `log_adunit` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体传的原始的广告位的id', \ `log_adtype` int(11) DEFAULT NULL COMMENT '展示类型', \ `log_istest` int(11) DEFAULT NULL COMMENT '是否是测试流量 0.正式 1.测试', \ `log_ip` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '客户端ip', \ `log_mcf` varchar(10) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体一级分类', \ `log_mcs` varchar(10) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体二级分类', \ `log_region` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '地域', \ `log_time` bigint(20) DEFAULT NULL COMMENT '时间戳', \ `log_logtype` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '日志类型', \ `log_token` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL, \ `log_adnormaltype` int(11) DEFAULT NULL COMMENT '反作弊类型,0表示正常', \ `log_mediaid` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '媒体id', \ `log_platid` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '平台id', \ `log_incost` double(20,2) DEFAULT NULL COMMENT '成本', \ `log_outcost` double(20,2) DEFAULT NULL COMMENT '消耗', \ `log_adcost` double(20,2) DEFAULT NULL COMMENT '广告主价格', \ `log_status` int(11) DEFAULT NULL COMMENT '状态: 0.时间太长失效; 1.正常', \ `log_kdgrequest_imp_num` int(11) DEFAULT NULL, \ `log_kdgrequest_device_devicetype` int(11) DEFAULT NULL COMMENT '0.未知;1.pc;2.phone;3.pad;4.tv', \ `log_kdgrequest_device_make` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备制造商', \ `log_kdgrequest_device_model` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备型号', \ `log_kdgrequest_device_os` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '如:Android、IOS、WindowsPhone', \ `log_kdgrequest_device_osv` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '操作系统版本;', \ `log_kdgrequest_device_h` int(11) DEFAULT NULL COMMENT '设备屏幕纵向分辨率', \ `log_kdgrequest_device_w` int(11) DEFAULT NULL COMMENT '设备屏幕横向分辨率', \ `log_kdgrequest_device_carrier` int(11) DEFAULT NULL COMMENT '运营商。0.未知;1.中国移动;2.中国联通;3.中国电信', \ `log_kdgrequest_device_connectiontype` int(11) DEFAULT NULL COMMENT '网络连接类型。0.未知;1.WIFI;2.2G;3.3G;4.4G;5.5G', \ `log_kdgrequest_device_ifa` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT 'iOS终端设备的明文。', \ `log_kdgrequest_device_imeimd5` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '设备IMEI(MEID)号,SHA1加密串。', \ `ader_brand` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '品牌名称', \ `ader_com_addr` varchar(225) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '公司注册地址', \ `ader_category` varchar(10) COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '0' COMMENT '广告主行业分类', \ `ader_settlement_type` tinyint(4) DEFAULT '1' COMMENT '结算方式 1 CPM 2 CPC', \ `ader_firstindustry` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '一级行业分类', \ `ader_secondindustry` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '二级行业分类', \ `ader_servicerate` tinyint(3) DEFAULT NULL COMMENT '服务费率', \ `ader_priority` tinyint(4) DEFAULT NULL COMMENT '优先级', \ `ader_credit` int(11) DEFAULT NULL COMMENT '信用金', \ `ader_status` tinyint(3) NOT NULL DEFAULT '0' COMMENT '状态(0正常,1删除)', \ `ader_flag` tinyint(3) NOT NULL DEFAULT '1' COMMENT '1开启,2暂停', \ `ader_auth_status` tinyint(3) DEFAULT '0' COMMENT '内审状态0待审核,1审核通过,2审核拒绝', \ `ader_audit_status` tinyint(4) NOT NULL DEFAULT '0' COMMENT '外审状态(0待审核,1通过,2拒绝,3部分通过)', \ `camp_name` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '计划名称', \ `camp_status` tinyint(3) NOT NULL DEFAULT '0' COMMENT '计划状态(0正常,1删除)', \ `camp_flag` tinyint(3) NOT NULL DEFAULT '1' COMMENT '1开启,2暂停', \ `camp_is_status` tinyint(3) DEFAULT '1' COMMENT '操作来源:1web,2曝光到量,3点击到量,4到期,5程序开启,6日预算到量,7周预算到量,8广告主余额不足', \ `camp_start_date` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '开始时间', \ `camp_end_date` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '结束日期', \ `camp_launch_type` tinyint(4) NOT NULL DEFAULT '1' COMMENT '投放类型 1 尽快投放 2 匀速投放', \ `camp_settlement_type` tinyint(4) DEFAULT '1' COMMENT '结算类型:1CPM,2CPC', \ `camp_settlement_price` decimal(10,2) DEFAULT '0.00' COMMENT '结算价格(元)', \ `camp_budget_type` tinyint(4) DEFAULT '0' COMMENT '预算控制(0不限,1周预算,2日预算)', \ `camp_budget_num` decimal(20,2) DEFAULT '0.00' COMMENT '预算金额', \ `camp_frequency_type` tinyint(4) DEFAULT '0' COMMENT '频次控制:(0不限,1周期内单人曝光不超过,2每天单人曝光不超过,3每小时单人曝光不超过', \ `camp_frequency_num` int(11) DEFAULT '0' COMMENT '频次', \ `camp_imp_limit` int(11) DEFAULT '0' COMMENT '曝光上限0表示不限', \ `camp_click_limit` int(11) DEFAULT '0' COMMENT '点击上限0表示不限', \ `camp_orient_time` text COLLATE utf8mb4_unicode_ci COMMENT '时段定向', \ `camp_orient_aear` text COLLATE utf8mb4_unicode_ci COMMENT '地域定向', \ `camp_orient_network` text COLLATE utf8mb4_unicode_ci COMMENT '网络定向(wifi,5G,4G,3G,2G,gprs)', \ `camp_orient_system` text COLLATE utf8mb4_unicode_ci COMMENT '系统定向(android,ios,windows,mac OS,other)', \ `camp_orient_plat` text COLLATE utf8mb4_unicode_ci COMMENT '渠道定向', \ `camp_orient_media` text COLLATE utf8mb4_unicode_ci COMMENT '媒体定向', \ `camp_orient_adslot` text COLLATE utf8mb4_unicode_ci COMMENT '广告位定向', \ `camp_orient_media_crowd` text COLLATE utf8mb4_unicode_ci COMMENT '媒体人群定向,json字符串', \ `camp_orient_interest` text COLLATE utf8mb4_unicode_ci COMMENT '兴趣定向', \ `ctv_width` int(11) NOT NULL COMMENT '宽', \ `ctv_height` int(11) NOT NULL COMMENT '高', \ `ctv_type` varchar(50) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '创意类型(1banner,2video,3native)', \ `ctv_style_id` int(11) DEFAULT '1' COMMENT '广告素材类型', \ `ctv_link_type` tinyint(4) NOT NULL DEFAULT '1' COMMENT '1:H5,2:App下载,3:应用直达', \ `ctv_size` int(11) NOT NULL COMMENT '素材大小', \ `ctv_deep_link` text COLLATE utf8mb4_unicode_ci COMMENT 'link_type为1:空。为2:iOS下载地址。为3:json对象包含落地页和唤醒地址', \ `ctv_first_category` int(8) DEFAULT NULL COMMENT '一级分类', \ `ctv_second_category` int(8) DEFAULT NULL COMMENT '二级分类', \ `ctv_label` text COLLATE utf8mb4_unicode_ci COMMENT '标签标识', \ `ctv_remark` text COLLATE utf8mb4_unicode_ci COMMENT '备注', \ KEY `idx1` (`log_day`,`log_creativeid`,`log_kdgrequest_device_ifa`(191),`log_kdgrequest_device_imeimd5`(191)), \ KEY `idx2` (`log_day`,`log_kdgrequest_device_ifa`(191)), \ KEY `idx3` (`log_day`,`log_kdgrequest_device_imeimd5`(191)), \ KEY `idx4` (`log_day`,`log_creativeid`) \ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; " % dt mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql) except Exception as e: print('Error:', e) finally: mysql.dispose()
def insert_dsp_train_base(dt): log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8] # 若存在该天数据,则首先删除 sql = "delete from dspbase.dsp_ctr_train where log_day=\'%s\'" % (log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.delete(sql) except Exception as e: print('Error:', e) finally: mysql.dispose() # 向训练表中插入有点击的数据 sql = "insert into dspbase.dsp_ctr_train \ (log_token \ ,log_dt \ ,log_day \ ,log_hour \ ,log_clk_flag \ ,log_advertiserid \ ,log_campaignid \ ,log_creativeid \ ,log_mediashowid \ ,log_media \ ,log_channel \ ,log_mcf \ ,log_mcs \ ,log_kdgrequest_device_devicetype \ ,log_kdgrequest_device_make \ ,log_kdgrequest_device_model \ ,log_kdgrequest_device_os \ ,log_kdgrequest_device_carrier \ ,log_kdgrequest_device_connectiontype \ ,log_kdgrequest_device_ifa \ ,log_kdgrequest_device_imeimd5 \ ,log_adspace \ ,log_adunit \ ,log_adtype \ ,log_adsize \ ,log_region_country \ ,log_region_province \ ,log_region_city \ ,ader_brand \ ,ader_secondindustry \ ,camp_settlement_price \ ,ctv_size \ ,ctv_label \ ,ctv_type \ ,ctv_style_id \ ,ctv_link_type \ ,ctv_second_category \ ,ctv_remark \ ,truncated_ifa \ ,ifa_cross_ctv \ ,ifa_cross_ad \ ,ifa_cross_campaign \ ,ifa_cross_ctv_style \ ,ifa_cross_ader_brand \ ,ifa_cross_ader_secondindustry \ ,ifa_cross_ctv_second_category \ ,truncated_imei \ ,imei_cross_ctv \ ,imei_cross_ad \ ,imei_cross_campaign \ ,imei_cross_ctv_style \ ,imei_cross_ader_brand \ ,imei_cross_ader_secondindustry \ ,imei_cross_ctv_second_category \ ,adsp_last7days_ctr \ ,adsp_yesterday_ctr \ ,adsp_last7days_clk_cnt \ ,adsp_yesterday_clk_cnt \ ,ader_last7days_ctr \ ,ader_yesterday_ctr \ ,ader_last7days_clk_cnt \ ,ader_yesterday_clk_cnt \ ,ctv_last7days_ctr \ ,ctv_yesterday_ctr \ ,ctv_last7days_clk_cnt \ ,ctv_yesterday_clk_cnt \ ,media_last7days_ctr \ ,media_yesterday_ctr \ ,media_last7days_clk_cnt \ ,media_yesterday_clk_cnt \ ,city_last7days_ctr \ ,city_yesterday_ctr \ ,city_last7days_clk_cnt \ ,city_yesterday_clk_cnt \ ,ifa_last7days_ctr \ ,ifa_yesterday_ctr \ ,ifa_last7days_clk_cnt \ ,ifa_yesterday_clk_cnt \ ,imei_last7days_ctr \ ,imei_yesterday_ctr \ ,imei_last7days_clk_cnt \ ,imei_yesterday_clk_cnt \ ) \ select \ t1.log_token \ ,t1.log_dt \ ,t1.log_day \ ,t1.log_hour \ ,t1.log_clk_flag \ ,t1.log_advertiserid \ ,t1.log_campaignid \ ,t1.log_creativeid \ ,t1.log_mediashowid \ ,t1.log_media \ ,t1.log_channel \ ,t1.log_mcf \ ,t1.log_mcs \ ,cast(t1.log_kdgrequest_device_devicetype as char(50)) \ ,t1.log_kdgrequest_device_make \ ,t1.log_kdgrequest_device_model \ ,t1.log_kdgrequest_device_os \ ,cast(t1.log_kdgrequest_device_carrier as char(50)) \ ,cast(t1.log_kdgrequest_device_connectiontype as char(50)) \ ,t1.log_kdgrequest_device_ifa \ ,t1.log_kdgrequest_device_imeimd5 \ ,t1.log_adspace \ ,t1.log_adunit \ ,t1.log_adtype \ ,cast(t1.log_adtype as char(50)) \ ,t8.country \ ,t8.province \ ,t8.city \ ,t1.ader_brand \ ,t1.ader_secondindustry \ ,t1.camp_settlement_price \ ,t1.ctv_size \ ,t1.ctv_label \ ,t1.ctv_type \ ,cast(t1.ctv_style_id as char(50)) \ ,cast(t1.ctv_link_type as char(50)) \ ,cast(t1.ctv_second_category as char(50)) \ ,t1.ctv_remark \ ,right(t1.log_kdgrequest_device_ifa,2) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_creativeid) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_adunit) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_campaignid) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_style_id) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_brand) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_secondindustry) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_second_category) \ ,right(t1.log_kdgrequest_device_imeimd5,2) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_creativeid) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_adunit) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_campaignid) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_style_id) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_brand) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_secondindustry) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_second_category) \ ,t22.click_cnt_week/t22.impression_cnt_week \ ,t22.click_cnt/t22.impression_cnt \ ,t22.click_cnt_week \ ,t22.click_cnt \ ,t2.click_cnt_week/t2.impression_cnt_week \ ,t2.click_cnt/t2.impression_cnt \ ,t2.click_cnt_week \ ,t2.click_cnt \ ,t3.click_cnt_week/t3.impression_cnt_week \ ,t3.click_cnt/t3.impression_cnt \ ,t3.click_cnt_week \ ,t3.click_cnt \ ,t4.click_cnt_week/t4.impression_cnt_week \ ,t4.click_cnt/t4.impression_cnt \ ,t4.click_cnt_week \ ,t4.click_cnt \ ,t5.click_cnt_week/t5.impression_cnt_week \ ,t5.click_cnt/t5.impression_cnt \ ,t5.click_cnt_week \ ,t5.click_cnt \ ,t6.click_cnt_week/t6.impression_cnt_week \ ,t6.click_cnt/t6.impression_cnt \ ,t6.click_cnt_week \ ,t6.click_cnt \ ,t7.click_cnt_week/t7.impression_cnt_week \ ,t7.click_cnt/t7.impression_cnt \ ,t7.click_cnt_week \ ,t7.click_cnt \ from dspbase.dsp_ctr_daily_merge%s t1 \ left join dspbase.dsp_ctr_stat_adspace t22 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t22.log_day \ and t1.log_adspace = t22.log_adspace \ left join dspbase.dsp_ctr_stat_advertiser t2 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t2.log_day \ and t1.log_advertiserid = t2.log_advertiserid \ left join dspbase.dsp_ctr_stat_creative t3 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t3.log_day \ and t1.log_creativeid = t3.log_creativeid \ left join dspbase.dsp_ctr_stat_media t4 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t4.log_day \ and t1.log_media = t4.log_media \ left join dspbase.dsp_ctr_stat_city t5 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t5.log_day \ and t1.log_region = t5.log_region_city \ left join dspbase.dsp_ctr_stat_ifa t6 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t6.log_day \ and t1.log_kdgrequest_device_ifa = t6.log_kdgrequest_device_ifa \ left join dspbase.dsp_ctr_stat_imei t7 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t7.log_day \ and t1.log_kdgrequest_device_imeimd5 = t7.log_kdgrequest_device_imeimd5 \ left join dspbase.dsp_config_city t8 \ on t1.log_region = t8.city \ where t1.log_istest=0 \ and locate('测试',t1.ader_brand)=0 \ and t1.log_adNormalType=0 \ and t1.log_clk_flag=1;" % (dt) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql) except Exception as e: print('Error:', e) finally: mysql.dispose() # 有曝光无点击的数据,下采样,倍率为0.025 sql1 = "select cast((select count(*)*0.025 from dspbase.dsp_ctr_daily_merge%s where log_clk_flag=0) as signed);" % (dt) mysql = ms.MyPymysqlPool("dbMysql") try: num = mysql.getOne(sql1) except Exception as e: print('Error:', e) finally: mysql.dispose() num = list(num.values())[0] # 向训练表中插入下采样数据 sql2 = "insert into dspbase.dsp_ctr_train \ (log_token \ ,log_dt \ ,log_day \ ,log_hour \ ,log_clk_flag \ ,log_advertiserid \ ,log_campaignid \ ,log_creativeid \ ,log_mediashowid \ ,log_media \ ,log_channel \ ,log_mcf \ ,log_mcs \ ,log_kdgrequest_device_devicetype \ ,log_kdgrequest_device_make \ ,log_kdgrequest_device_model \ ,log_kdgrequest_device_os \ ,log_kdgrequest_device_carrier \ ,log_kdgrequest_device_connectiontype \ ,log_kdgrequest_device_ifa \ ,log_kdgrequest_device_imeimd5 \ ,log_adspace \ ,log_adunit \ ,log_adtype \ ,log_adsize \ ,log_region_country \ ,log_region_province \ ,log_region_city \ ,ader_brand \ ,ader_secondindustry \ ,camp_settlement_price \ ,ctv_size \ ,ctv_label \ ,ctv_type \ ,ctv_style_id \ ,ctv_link_type \ ,ctv_second_category \ ,ctv_remark \ ,truncated_ifa \ ,ifa_cross_ctv \ ,ifa_cross_ad \ ,ifa_cross_campaign \ ,ifa_cross_ctv_style \ ,ifa_cross_ader_brand \ ,ifa_cross_ader_secondindustry \ ,ifa_cross_ctv_second_category \ ,truncated_imei \ ,imei_cross_ctv \ ,imei_cross_ad \ ,imei_cross_campaign \ ,imei_cross_ctv_style \ ,imei_cross_ader_brand \ ,imei_cross_ader_secondindustry \ ,imei_cross_ctv_second_category \ ,adsp_last7days_ctr \ ,adsp_yesterday_ctr \ ,adsp_last7days_clk_cnt \ ,adsp_yesterday_clk_cnt \ ,ader_last7days_ctr \ ,ader_yesterday_ctr \ ,ader_last7days_clk_cnt \ ,ader_yesterday_clk_cnt \ ,ctv_last7days_ctr \ ,ctv_yesterday_ctr \ ,ctv_last7days_clk_cnt \ ,ctv_yesterday_clk_cnt \ ,media_last7days_ctr \ ,media_yesterday_ctr \ ,media_last7days_clk_cnt \ ,media_yesterday_clk_cnt \ ,city_last7days_ctr \ ,city_yesterday_ctr \ ,city_last7days_clk_cnt \ ,city_yesterday_clk_cnt \ ,ifa_last7days_ctr \ ,ifa_yesterday_ctr \ ,ifa_last7days_clk_cnt \ ,ifa_yesterday_clk_cnt \ ,imei_last7days_ctr \ ,imei_yesterday_ctr \ ,imei_last7days_clk_cnt \ ,imei_yesterday_clk_cnt \ ) \ select \ t1.log_token \ ,t1.log_dt \ ,t1.log_day \ ,t1.log_hour \ ,t1.log_clk_flag \ ,t1.log_advertiserid \ ,t1.log_campaignid \ ,t1.log_creativeid \ ,t1.log_mediashowid \ ,t1.log_media \ ,t1.log_channel \ ,t1.log_mcf \ ,t1.log_mcs \ ,cast(t1.log_kdgrequest_device_devicetype as char(50)) \ ,t1.log_kdgrequest_device_make \ ,t1.log_kdgrequest_device_model \ ,t1.log_kdgrequest_device_os \ ,cast(t1.log_kdgrequest_device_carrier as char(50)) \ ,cast(t1.log_kdgrequest_device_connectiontype as char(50)) \ ,t1.log_kdgrequest_device_ifa \ ,t1.log_kdgrequest_device_imeimd5 \ ,t1.log_adspace \ ,t1.log_adunit \ ,t1.log_adtype \ ,cast(t1.log_adtype as char(50)) \ ,t8.country \ ,t8.province \ ,t8.city \ ,t1.ader_brand \ ,t1.ader_secondindustry \ ,t1.camp_settlement_price \ ,t1.ctv_size \ ,t1.ctv_label \ ,t1.ctv_type \ ,cast(t1.ctv_style_id as char(50)) \ ,cast(t1.ctv_link_type as char(50)) \ ,cast(t1.ctv_second_category as char(50)) \ ,t1.ctv_remark \ ,right(t1.log_kdgrequest_device_ifa,2) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_creativeid) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_adunit) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.log_campaignid) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_style_id) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_brand) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ader_secondindustry) \ ,concat(right(t1.log_kdgrequest_device_ifa,2), t1.ctv_second_category) \ ,right(t1.log_kdgrequest_device_imeimd5,2) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_creativeid) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_adunit) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.log_campaignid) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_style_id) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_brand) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ader_secondindustry) \ ,concat(right(t1.log_kdgrequest_device_imeimd5,2), t1.ctv_second_category) \ ,t22.click_cnt_week/t22.impression_cnt_week \ ,t22.click_cnt/t22.impression_cnt \ ,t22.click_cnt_week \ ,t22.click_cnt \ ,t2.click_cnt_week/t2.impression_cnt_week \ ,t2.click_cnt/t2.impression_cnt \ ,t2.click_cnt_week \ ,t2.click_cnt \ ,t3.click_cnt_week/t3.impression_cnt_week \ ,t3.click_cnt/t3.impression_cnt \ ,t3.click_cnt_week \ ,t3.click_cnt \ ,t4.click_cnt_week/t4.impression_cnt_week \ ,t4.click_cnt/t4.impression_cnt \ ,t4.click_cnt_week \ ,t4.click_cnt \ ,t5.click_cnt_week/t5.impression_cnt_week \ ,t5.click_cnt/t5.impression_cnt \ ,t5.click_cnt_week \ ,t5.click_cnt \ ,t6.click_cnt_week/t6.impression_cnt_week \ ,t6.click_cnt/t6.impression_cnt \ ,t6.click_cnt_week \ ,t6.click_cnt \ ,t7.click_cnt_week/t7.impression_cnt_week \ ,t7.click_cnt/t7.impression_cnt \ ,t7.click_cnt_week \ ,t7.click_cnt \ from dspbase.dsp_ctr_daily_merge%s t1 \ left join dspbase.dsp_ctr_stat_adspace t22 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t22.log_day \ and t1.log_adspace = t22.log_adspace \ left join dspbase.dsp_ctr_stat_advertiser t2 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t2.log_day \ and t1.log_advertiserid = t2.log_advertiserid \ left join dspbase.dsp_ctr_stat_creative t3 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t3.log_day \ and t1.log_creativeid = t3.log_creativeid \ left join dspbase.dsp_ctr_stat_media t4 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t4.log_day \ and t1.log_media = t4.log_media \ left join dspbase.dsp_ctr_stat_city t5 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t5.log_day \ and t1.log_region = t5.log_region_city \ left join dspbase.dsp_ctr_stat_ifa t6 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t6.log_day \ and t1.log_kdgrequest_device_ifa = t6.log_kdgrequest_device_ifa \ left join dspbase.dsp_ctr_stat_imei t7 \ on DATE_SUB(t1.log_day, INTERVAL 1 DAY) = t7.log_day \ and t1.log_kdgrequest_device_imeimd5 = t7.log_kdgrequest_device_imeimd5 \ left join dspbase.dsp_config_city t8 \ on t1.log_region = t8.city \ where t1.log_istest=0 \ and locate('测试',t1.ader_brand)=0 \ and t1.log_adNormalType=0 \ and t1.log_clk_flag=0 order by rand() limit %d;" % (dt, num) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql2) except Exception as e: print('Error:', e) finally: mysql.dispose()
def load_file_data(tablename_question_stem, tablename_question_textbook, tablename_question_ukt, path_textbook, subject_type, textbook_name, skipfirstrow=True): """ 读取数据表中的文件路径,加载文件数据 """ mysql = msc.MyPymysqlPool("dbMysql") # 读取数据表中的目录名 sql0 = ''.join(['select file_id,file_name from ', tablename_question_ukt, \ " t1 inner join ", tablename_question_textbook, " t2 on t1.textbook_id=t2.textbook_id" " where t2.textbook_name='", textbook_name , "' and t2.subject_type='", subject_type, \ "' order by file_id;", ]) rstData = mysql.getAll(sql0) if rstData: dataSet = pd.DataFrame(list(list(x.values()) for x in rstData)) init_sql = ''.join(['insert into ', tablename_question_stem, \ "(file_id,row_no,question_stem,question_options,difficulty,image_filename,url,load_time)" \ " values(str_file_id, str_row_no, 'str_question_stem', " \ "'str_question_options', str_difficulty, 'str_image_filename', " \ "'str_url', CURRENT_TIMESTAMP());", ]) for i in range(dataSet.shape[0]): str_file_id = str(dataSet.iloc[i][0]) path_file_name = path_textbook + '/' + str(dataSet.iloc[i][1]) # 读取文件内容,形如:[',5千克铁的质量大于5000克棉花的质量.( \xa0 ),', '', '难度: 0.47', '*.png'] # mysql.begin() # 开启事务 with open(path_file_name, 'r') as f: reader = csv.reader(f) row_no = 1 for line in reader: if skipfirstrow and row_no == 1: # 跳过首行 row_no += 1 continue str_question_stem = line[0].strip(',').replace( '。,', '。').replace('\'', '"') str_question_options = line[1].replace('[', '').replace( ']', '').replace('\'', '').replace('、,', '、').strip(',') str_difficulty = line[2].strip('难度: ') str_image_filename = line[3].strip(' ') str_url = line[4].strip(' ') # print(line, str_question_stem, str_option, str_difficulty, str_image_filename, str_url) # 插入文件名关联数据 sql = init_sql.replace('str_file_id', str_file_id) sql = sql.replace('str_row_no', str(row_no)) sql = sql.replace('str_question_stem', str_question_stem) sql = sql.replace('str_question_options', str_question_options) sql = sql.replace('str_difficulty', str_difficulty) sql = sql.replace('str_image_filename', str_image_filename) sql = sql.replace('str_url', str_url) try: # if str_file_id == '2113' and row_no == 2: # print('path_file_name1: ', path_file_name, str_file_id, '-', row_no) # print(sql) mysql.insert(sql) except Exception: print('path_file_name: ', path_file_name, str_file_id, '-', row_no) print(sql) row_no += 1 f.close() mysql.end() # 结束提交 mysql.dispose()
def update_deviceid_stat(dt): log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8] # 若存在该天数据,则首先删除 sql = "delete from dspbase.dsp_ctr_stat_deviceid where log_day=\'%s\'" % ( log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.delete(sql) except Exception as e: print('Error:', e) finally: mysql.dispose() # 插入曝光、点击统计值 sql1 = "insert into dspbase.dsp_ctr_stat_deviceid \ (log_day,log_kdgrequest_deviceid,impression_cnt,click_cnt) \ select log_day,log_kdgrequest_deviceid,count(*),sum(log_clk_flag) \ from dspbase.dsp_ctr_daily_merge%s \ where log_day=\'%s\' and log_kdgrequest_deviceid is not null \ group by log_day,log_kdgrequest_deviceid;" % (dt, log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql1) except Exception as e: print('Error:', e) finally: mysql.dispose() # 删除临时表dsp_ctr_stat_ifa_tmp中数据 sql2 = "delete from dspbase.dsp_ctr_stat_deviceid_tmp where log_day=\'%s\'" % ( log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.delete(sql2) except Exception as e: print('Error:', e) finally: mysql.dispose() # 向临时表中插入统计数据 sql3 = "insert into dspbase.dsp_ctr_stat_deviceid_tmp \ select \'%s\',log_kdgrequest_deviceid,sum(impression_cnt),sum(click_cnt) \ from dspbase.dsp_ctr_stat_deviceid \ where log_day between DATE_SUB(\'%s\', INTERVAL 6 DAY) and \'%s\' \ and log_kdgrequest_deviceid in \ (select distinct log_kdgrequest_deviceid \ from dspbase.dsp_ctr_daily_merge%s) \ group by log_kdgrequest_deviceid;" % (log_day, log_day, log_day, dt) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql3) except Exception as e: print('Error:', e) finally: mysql.dispose() # 更新impression_cnt_week、click_cnt_week字段 sql4 = "update dspbase.dsp_ctr_stat_deviceid t1 \ left join dspbase.dsp_ctr_stat_deviceid_tmp t2 \ on t1.log_day=t2.log_day \ and t1.log_kdgrequest_deviceid=t2.log_kdgrequest_deviceid \ set t1.impression_cnt_week=t2.impression_cnt_week \ ,t1.click_cnt_week=t2.click_cnt_week \ where t1.log_day=\'%s\';" % (log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql4) except Exception as e: print('Error:', e) finally: mysql.dispose() # 清空临时表dsp_ctr_stat_ifa_tmp sql5 = "truncate table dspbase.dsp_ctr_stat_deviceid_tmp" mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql5) except Exception as e: print('Error:', e) finally: mysql.dispose()
def update_table_minmax(): # 若存在该天数据,则首先删除 sql = "delete from dspbase.dsp_ctr_train_minmax where log_day=\'%s\'" % (log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.delete(sql) except Exception as e: print('Error:', e) finally: mysql.dispose() sql = "insert into dspbase.dsp_ctr_train_minmax select \'{0}\',{1},{2},{3},{4},{5},{6},{7},{8},{9},{10}\ ,{11},{12},{13},{14},{15},{16},{17},{18},{19},{20},{21},{22},{23},{24},{25},{26},{27},{28},{29}\ ,{30},{31},{32},{33},{34},{35},{36},{37},{38},{39},{40},{41},{42},{43},{44},{45},{46},{47},{48},{49},{50}\ ,{51},{52},{53},{54},{55},{56},{57},{58},{59},{60}".format(log_day , df['camp_settlement_price'].min() , df['ctv_size'].min() , df['adsp_last7days_ctr'].min() , df['adsp_yesterday_ctr'].min() , df['adsp_last7days_clk_cnt'].min() , df['adsp_yesterday_clk_cnt'].min() , df['ader_last7days_ctr'].min() , df['ader_yesterday_ctr'].min() , df['ader_last7days_clk_cnt'].min() , df['ader_yesterday_clk_cnt'].min() , df['ctv_last7days_ctr'].min() , df['ctv_yesterday_ctr'].min() , df['ctv_last7days_clk_cnt'].min() , df['ctv_yesterday_clk_cnt'].min() , df['media_last7days_ctr'].min() , df['media_yesterday_ctr'].min() , df['media_last7days_clk_cnt'].min() , df['media_yesterday_clk_cnt'].min() , df['city_last7days_ctr'].min() , df['city_yesterday_ctr'].min() , df['city_last7days_clk_cnt'].min() , df['city_yesterday_clk_cnt'].min() , df['ifa_last7days_ctr'].min() , df['ifa_yesterday_ctr'].min() , df['ifa_last7days_clk_cnt'].min() , df['ifa_yesterday_clk_cnt'].min() , df['imei_last7days_ctr'].min() , df['imei_yesterday_ctr'].min() , df['imei_last7days_clk_cnt'].min() , df['imei_yesterday_clk_cnt'].min() , df['camp_settlement_price'].max() , df['ctv_size'].max() , df['adsp_last7days_ctr'].max() , df['adsp_yesterday_ctr'].max() , df['adsp_last7days_clk_cnt'].max() , df['adsp_yesterday_clk_cnt'].max() , df['ader_last7days_ctr'].max() , df['ader_yesterday_ctr'].max() , df['ader_last7days_clk_cnt'].max() , df['ader_yesterday_clk_cnt'].max() , df['ctv_last7days_ctr'].max() , df['ctv_yesterday_ctr'].max() , df['ctv_last7days_clk_cnt'].max() , df['ctv_yesterday_clk_cnt'].max() , df['media_last7days_ctr'].max() , df['media_yesterday_ctr'].max() , df['media_last7days_clk_cnt'].max() , df['media_yesterday_clk_cnt'].max() , df['city_last7days_ctr'].max() , df['city_yesterday_ctr'].max() , df['city_last7days_clk_cnt'].max() , df['city_yesterday_clk_cnt'].max() , df['ifa_last7days_ctr'].max() , df['ifa_yesterday_ctr'].max() , df['ifa_last7days_clk_cnt'].max() , df['ifa_yesterday_clk_cnt'].max() , df['imei_last7days_ctr'].max() , df['imei_yesterday_ctr'].max() , df['imei_last7days_clk_cnt'].max() , df['imei_yesterday_clk_cnt'].max()) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql) except Exception as e: print('Error:', e) finally: mysql.dispose()
def update_advertiser_stat(dt): log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8] # 若存在该天数据,则首先删除 sql = "delete from dspbase.dsp_ctr_stat_advertiser where log_day=\'%s\'" % ( log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.delete(sql) except Exception as e: print('Error:', e) finally: mysql.dispose() # 插入曝光、点击统计值 sql1 = "insert into dspbase.dsp_ctr_stat_advertiser \ (log_day,log_advertiserid,impression_cnt,click_cnt) \ select log_day,log_advertiserid,count(*),sum(log_clk_flag) \ from dspbase.dsp_ctr_daily_merge%s \ where log_day=\'%s\' \ group by log_day,log_advertiserid;" % (dt, log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql1) except Exception as e: print('Error:', e) finally: mysql.dispose() sql12 = "insert into dspbase.dsp_ctr_stat_advertiser(log_day,log_advertiserid,impression_cnt,click_cnt) \ select \'%s\',t1.log_advertiserid,0,0 from dspbase.dsp_ctr_stat_advertiser t1 \ left join dspbase.dsp_ctr_stat_advertiser t2 \ on t2.log_day=\'%s\' \ and t1.log_advertiserid=t2.log_advertiserid \ where t1.log_day between DATE_SUB(\'%s\', INTERVAL 7 DAY) and DATE_SUB(\'%s\', INTERVAL 1 DAY) \ and t2.log_advertiserid is null \ and t1.impression_cnt_week is not null \ group by t1.log_advertiserid;" % (log_day, log_day, log_day, log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql12) except Exception as e: print('Error:', e) finally: mysql.dispose() # 更新impression_cnt_week、click_cnt_week字段 sql2 = "update dspbase.dsp_ctr_stat_advertiser t0 \ INNER JOIN \ (select t1.log_day, t1.log_advertiserid \ ,sum(t2.impression_cnt) as impression_cnt_week \ ,sum(t2.click_cnt) as click_cnt_week \ from dspbase.dsp_ctr_stat_advertiser t1 \ left join dspbase.dsp_ctr_stat_advertiser t2 \ on t2.log_day between DATE_SUB(t1.log_day, INTERVAL 6 DAY) \ and t1.log_day and t1.log_advertiserid=t2.log_advertiserid \ where t1.log_day=\'%s\' \ group by t1.log_day, t1.log_advertiserid) t3 \ on t0.log_day=t3.log_day and t0.log_advertiserid=t3.log_advertiserid \ set t0.impression_cnt_week=t3.impression_cnt_week \ ,t0.click_cnt_week=t3.click_cnt_week \ where t0.log_day=\'%s\';" % (log_day, log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql2) except Exception as e: print('Error:', e) finally: mysql.dispose()
def insert_merge(dt): # 转换日期格式:20180906 -> 2018-09-06 log_day = dt[0:4] + '-' + dt[4:6] + '-' + dt[6:8] # 插入数据 sql = "insert into dspbase.dsp_ctr_daily_merge%s \ (log_dt \ ,log_day \ ,log_hour\ ,log_imp_flag \ ,log_clk_flag \ ,log_accountid \ ,log_advertiserid \ ,log_campaignid \ ,log_creativeid \ ,log_mediashowid \ ,log_channel \ ,log_devicetype \ ,log_adsize \ ,log_ctr \ ,log_media \ ,log_adspace \ ,log_adunit \ ,log_adtype \ ,log_istest \ ,log_ip \ ,log_mcf \ ,log_mcs \ ,log_region \ ,log_token \ ,log_adnormaltype \ ,log_mediaid \ ,log_platid \ ,log_incost \ ,log_outcost \ ,log_adcost \ ,log_status \ ,log_kdgrequest_imp_num \ ,log_kdgrequest_device_devicetype \ ,log_kdgrequest_device_make \ ,log_kdgrequest_device_model \ ,log_kdgrequest_device_os \ ,log_kdgrequest_device_h \ ,log_kdgrequest_device_w \ ,log_kdgrequest_device_carrier \ ,log_kdgrequest_device_connectiontype \ ,log_kdgrequest_device_ifa \ ,log_kdgrequest_device_imeimd5 \ ,ader_brand \ ,ader_firstindustry \ ,ader_secondindustry \ ,ader_servicerate \ ,camp_settlement_type \ ,camp_settlement_price \ ,camp_imp_limit \ ,camp_click_limit \ ,ctv_width \ ,ctv_height \ ,ctv_type \ ,ctv_style_id \ ,ctv_link_type \ ,ctv_size \ ,ctv_deep_link \ ,ctv_first_category \ ,ctv_second_category \ ,ctv_label \ ,ctv_remark \ ) \ select \ t0.dt \ ,left(t0.dt,10) \ ,right(left(t0.dt,13),2) \ ,t0.imp_flag \ ,t0.clk_flag \ ,t0.accountid \ ,t0.advertiserid \ ,t0.campaignid \ ,t0.creativeid \ ,t0.mediashowid \ ,t0.channel \ ,t0.devicetype \ ,t0.adsize \ ,t0.ctr \ ,t0.media \ ,t0.adspace \ ,t0.adunit \ ,t0.adtype \ ,t0.istest \ ,t0.ip \ ,t0.mcf \ ,t0.mcs \ ,t0.region \ ,t0.token \ ,t0.adnormaltype \ ,t0.mediaid \ ,t0.platid \ ,t0.incost \ ,t0.outcost \ ,t0.adcost \ ,t0.status \ ,t0.kdgrequest_imp_num \ ,t0.kdgrequest_device_devicetype \ ,t0.kdgrequest_device_make \ ,t0.kdgrequest_device_model \ ,t0.kdgrequest_device_os \ ,t0.kdgrequest_device_h \ ,t0.kdgrequest_device_w \ ,t0.kdgrequest_device_carrier \ ,t0.kdgrequest_device_connectiontype \ ,t0.kdgrequest_device_ifa \ ,t0.kdgrequest_device_imeimd5 \ ,t1.brand \ ,t1.firstindustry \ ,t1.secondindustry \ ,t1.servicerate \ ,t2.settlement_type \ ,t2.settlement_price \ ,t2.imp_limit \ ,t2.click_limit \ ,t3.width \ ,t3.height \ ,t3.type \ ,t3.style_id \ ,t3.link_type \ ,t3.size \ ,t3.deep_link \ ,t3.first_category \ ,t3.second_category \ ,t3.label \ ,t3.remark \ from dspbase.src_log_info_0907_0929 t0 \ left join dspbase.src_advertiser_info t1 on t0.advertiserid = t1.id \ left join dspbase.src_campaign t2 on t0.campaignid = t2.id \ left join dspbase.src_creative t3 on t0.creativeid = t3.id where imp_flag <> 0 \ and t0.dt like \'%s%%\';" % (dt, log_day) mysql = ms.MyPymysqlPool("dbMysql") try: mysql.insert(sql) # print(sql) except Exception as e: print('Error:', e) finally: mysql.dispose()
def insert_img_infos_db(values): sql = "insert into img_info values(%s,%s,%s,%s)" mysql = mysql_conn.MyPymysqlPool("dbMysql") mysql.insertMany(sql,values) mysql.dispose()
def fetch_data(table, dt_info, lmt=''): mysql = ms.MyPymysqlPool("dbMysql") sql = "select \ log_token\ ,log_clk_flag\ ,log_hour\ ,log_advertiserid\ ,log_campaignid\ ,log_creativeid\ ,log_mediashowid\ ,log_media\ ,log_channel\ ,log_mcf\ ,log_mcs\ ,log_kdgrequest_device_devicetype\ ,log_kdgrequest_device_make\ ,log_kdgrequest_device_model\ ,log_kdgrequest_device_os\ ,log_kdgrequest_device_carrier\ ,log_kdgrequest_device_connectiontype\ ,log_adspace\ ,log_adunit\ ,log_adtype\ ,log_adsize\ ,log_region_country\ ,log_region_province\ ,log_region_city\ ,ader_brand\ ,ader_secondindustry\ ,ctv_label\ ,ctv_type\ ,ctv_style_id\ ,ctv_second_category\ ,ctv_remark\ ,truncated_ifa\ ,truncated_imei\ ,ifa_cross_ctv\ ,ifa_cross_ad\ ,ifa_cross_campaign\ ,ifa_cross_ctv_style\ ,ifa_cross_ader_brand\ ,ifa_cross_ader_secondindustry\ ,ifa_cross_ctv_second_category\ ,imei_cross_ctv\ ,imei_cross_ad\ ,imei_cross_campaign\ ,imei_cross_ctv_style\ ,imei_cross_ader_brand\ ,imei_cross_ader_secondindustry\ ,imei_cross_ctv_second_category\ ,camp_settlement_price\ ,ctv_size\ ,ifnull(adsp_last7days_ctr,0)\ ,ifnull(adsp_yesterday_ctr,0)\ ,ifnull(adsp_last7days_clk_cnt,0)\ ,ifnull(adsp_yesterday_clk_cnt,0)\ ,ifnull(ader_last7days_ctr,0)\ ,ifnull(ader_yesterday_ctr,0)\ ,ifnull(ader_last7days_clk_cnt,0)\ ,ifnull(ader_yesterday_clk_cnt,0)\ ,ifnull(ctv_last7days_ctr,0)\ ,ifnull(ctv_yesterday_ctr,0)\ ,ifnull(ctv_last7days_clk_cnt,0)\ ,ifnull(ctv_yesterday_clk_cnt,0)\ ,ifnull(media_last7days_ctr,0)\ ,ifnull(media_yesterday_ctr,0)\ ,ifnull(media_last7days_clk_cnt,0)\ ,ifnull(media_yesterday_clk_cnt,0)\ ,ifnull(city_last7days_ctr,0)\ ,ifnull(city_yesterday_ctr,0)\ ,ifnull(city_last7days_clk_cnt,0)\ ,ifnull(city_yesterday_clk_cnt,0)\ ,ifnull(ifa_last7days_ctr,0)\ ,ifnull(ifa_yesterday_ctr,0)\ ,ifnull(ifa_last7days_clk_cnt,0)\ ,ifnull(ifa_yesterday_clk_cnt,0)\ ,ifnull(imei_last7days_ctr,0)\ ,ifnull(imei_yesterday_ctr,0)\ ,ifnull(imei_last7days_clk_cnt,0)\ ,ifnull(imei_yesterday_clk_cnt,0)\ from dspbase.%s where log_day between DATE_SUB(\'%s\', INTERVAL 30 DAY) and \'%s\' %s"\ % (table, dt_info, dt_info, lmt) rst = mysql.getAll(sql) if not rst: mysql.dispose() print(' No data!') exit() else: df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('') print(' %d rows data have been fetched.' % len(df)) mysql.dispose() return shuffle(df)
def fetchDataGDT(trans_tab, dt, indst_id, bill_evt, limit=''): mysql = ms.MyPymysqlPool("dbMysql") # sql = "SELECT \ # atv_flag \ # ,id \ # ,adg_ts_workday1 \ # ,adg_ts_workday2 \ # ,adg_ts_workday3 \ # ,adg_ts_workday4 \ # ,adg_ts_workday5 \ # ,adg_ts_workday6 \ # ,adg_ts_workday7 \ # ,adg_ts_weekend1 \ # ,adg_ts_weekend2 \ # ,adg_ts_weekend3 \ # ,adg_ts_weekend4 \ # ,adg_ts_weekend5 \ # ,adg_site_set \ # ,adg_product_type \ # ,adg_configured_status \ # ,ad_adcreative_name \ # ,ad_adcreative_name_game_name \ # ,ad_adcreative_name_material_type \ # ,ad_adcreative_name_material_effect \ # ,ad_adcreative_name_device_type \ # ,ad_adcreative_name_interest \ # ,ad_adcreative_name_behavior \ # ,ad_adcreative_name_paid \ # ,ade_image_hash \ # ,ade_title \ # ,ade_image2_hash \ # ,ade_image3_hash \ # ,ad_space \ # ,adc_form_size \ # ,adc_form_detail \ # ,adc_description \ # ,adg_bid_amount \ # ,log(adg_crt_days) \ # ,atv_ratio_bin11_hisratio_h \ # ,adc_drp1_1 \ # ,adc_drp1_2 \ # ,adc_drp1_3 \ # ,adc_drp1_4 \ # ,adc_drp1_5 \ # ,adc_drp1_6 \ # ,adc_drp1_7 \ # ,adc_drp1_8 \ # ,adc_drp1_9 \ # ,adc_drp1_10 \ # ,adc_drp1_11 \ # ,adc_drp1_12 \ # ,adc_drp1_13 \ # ,adc_drp1_14 \ # ,adc_drp1_15 \ # ,adc_drp1_16 \ # ,adc_drp1_17 \ # ,adc_drp1_18 \ # ,adc_drp1_19 \ # ,adc_drp1_20 \ # ,adc_drp1_21 \ # ,adc_drp1_22 \ # ,adc_drp1_23 \ # ,adc_drp1_24 \ # ,adc_drp1_25 \ # ,adc_drp1_26 \ # ,adc_drp1_27 \ # ,adc_drp1_28 \ # ,adc_drp1_29 \ # ,adc_drp1_30 \ # ,adc_drp1_31 \ # ,adc_drp1_32 \ # ,adc_drp1_33 \ # ,adc_drp1_34 \ # ,adc_drp1_35 \ # ,adc_drp1_36 \ # ,adc_drp1_37 \ # ,adc_drp1_38 \ # ,adc_drp2_1 \ # ,adc_drp2_2 \ # ,adc_drp2_3 \ # ,adc_drp2_4 \ # ,adc_drp2_5 \ # ,adc_drp2_6 \ # ,adc_drp2_7 \ # ,adc_drp2_8 \ # ,adc_drp2_9 \ # ,adc_drp3_1 \ # ,adc_drp3_2 \ # ,adc_drp3_3 \ # ,adc_drp3_4 \ # ,adc_drp3_5 \ # ,adc_drp3_6 \ # ,adc_drp3_7 \ # ,adc_drp3_8 \ # ,adc_drp3_9 \ # ,adc_drp3_10 \ # ,adc_drp3_11 \ # ,adc_drp3_12 \ # ,adc_drp3_13 \ # ,adc_drp3_14 \ # ,adc_drp3_15 \ # ,adc_drp3_16 \ # ,adc_drp3_17 \ # ,adc_drp3_18 \ # ,adc_drp3_19 \ # ,adc_drp3_20 \ # ,adc_drp3_21 \ # ,adc_drp3_22 \ # ,adc_drp3_23 \ # ,adc_drp3_24 \ # ,adc_drp3_25 \ # ,adc_drp3_26 \ # ,tag_age \ # ,tag_gender \ # ,tag_education \ # ,tag_relationship_status \ # ,tag_living_status \ # ,tag_business_interest \ # ,tag_location \ # ,tag_region \ # ,tag_gloc_location_types \ # ,tag_gloc_regions \ # ,tag_gloc_business_districts \ # ,tag_user_os \ # ,tag_new_device \ # ,tag_device_price \ # ,tag_network_type \ # ,tag_network_operator \ # ,tag_dressing_index \ # ,tag_uv_index \ # ,tag_makeup_index \ # ,tag_climate \ # ,tag_temperature \ # ,tag_app_install_status \ # ,tag_abh_object_type \ # ,tag_abh_object_id_list \ # ,tag_abh_time_window \ # ,tag_abh_act_id_list \ # ,tag_customized_audience \ # ,tag_shopping_capability \ # ,tag_player_consupt \ # ,tag_paying_user_type \ # ,tag_residential_community_price \ # ,tag_media_category_wechat \ # ,tag_ad_placement_id \ # ,tag_media_category_union \ # ,tag_qzone_fans \ # ,tag_online_scenario \ # ,tag_custom_audience \ # ,tag_boi_i_targeting_tags \ # ,concat(adg_bid_amount,ad_adcreative_name) \ # ,concat(adg_bid_amount,ad_adcreative_name,atv_ratio_bin11_hisratio_h) \ # ,concat(adg_bid_amount,atv_ratio_bin11_hisratio_h) \ # ,concat(adg_bid_amount,tag_gender) \ # ,concat(adg_bid_amount,tag_gloc_regions) \ # ,concat(adg_bid_amount,tag_gloc_regions,ad_space) \ # ,concat(adg_bid_amount,tag_gloc_regions,ad_space,adc_drp1_24) \ # ,concat(adg_bid_amount,tag_gloc_regions,adc_drp1_24) \ # ,concat(adg_bid_amount,adg_ts_workday1) \ # ,concat(adg_bid_amount,adg_ts_workday1,ad_adcreative_name) \ # ,concat(adg_bid_amount,adg_ts_workday1,tag_gender) \ # ,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions) \ # ,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions,ad_space) \ # ,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions,ad_space,adc_drp1_24) \ # ,concat(adg_bid_amount,adg_ts_workday1,tag_gloc_regions,adc_drp1_24) \ # ,concat(ad_adcreative_name,atv_ratio_bin11_hisratio_h) \ # ,concat(ad_adcreative_name,tag_gender) \ # ,concat(ad_adcreative_name,tag_gender,atv_ratio_bin11_hisratio_h) \ # ,concat(tag_gloc_regions,ad_space) \ # ,concat(tag_gloc_regions,ad_space,adc_drp1_24) \ # ,concat(tag_gloc_regions,adc_drp1_24) \ # ,concat(adg_ts_workday1,ad_adcreative_name) \ # ,concat(adg_ts_workday1,ad_adcreative_name,atv_ratio_bin11_hisratio_h) \ # ,concat(adg_ts_workday1,ad_adcreative_name,tag_gender) \ # ,concat(adg_ts_workday1,atv_ratio_bin11_hisratio_h) \ # ,concat(adg_ts_workday1,tag_gender) \ # ,concat(adg_ts_workday1,tag_gloc_regions) \ # ,concat(adg_ts_workday1,tag_gloc_regions,ad_space) \ # ,concat(adg_ts_workday1,tag_gloc_regions,ad_space,adc_drp1_24) \ # ,concat(adg_ts_workday1,tag_gloc_regions,adc_drp1_24) \ # FROM %s \ # where dt in %s and ind_first_industry_id=%d and adg_billing_event='%s' \ # and id <= 100000000 %s " \ # % (trans_tab, dt, indst_id, bill_evt, limit) sql = "SELECT \ atv_flag \ ,id \ ,hour \ ,day_week \ ,day_type \ ,adg_ts_workday1 \ ,adg_ts_workday2 \ ,adg_ts_workday3 \ ,adg_ts_workday4 \ ,adg_ts_workday5 \ ,adg_ts_workday6 \ ,adg_ts_workday7 \ ,adg_ts_weekend1 \ ,adg_ts_weekend2 \ ,adg_ts_weekend3 \ ,adg_ts_weekend4 \ ,adg_ts_weekend5 \ ,adg_site_set \ ,adg_product_type \ ,ad_adcreative_name \ ,ade_image_hash \ ,ade_title \ ,ade_title_len \ ,ad_space \ ,adc_form_size \ ,adc_form_detail \ ,adc_description \ ,adg_bid_amount \ ,adg_atv_days \ ,ad_atv_days \ ,adgcrt_imp_hours \ ,adgcrt_last_atv_interval \ ,atv_ratio_bin11_hisratio_h \ ,atv_ratio_hisavg \ ,adc_drp1_4 \ ,adc_drp1_17 \ ,adc_drp1_23 \ ,adc_drp1_24 \ ,adc_drp1_31 \ ,adc_drp1_32 \ ,adc_drp1_33 \ ,adc_drp3_5 \ ,adc_drp3_16 \ ,adc_drp3_26 \ ,tag_age_0_15 \ ,tag_age_15_20 \ ,tag_age_20_25 \ ,tag_age_25_30 \ ,tag_age_30_35 \ ,tag_age_35_40 \ ,tag_age_40_45 \ ,tag_age_45_50 \ ,tag_age_50 \ ,tag_gender \ ,tag_business_interest \ ,tag_gloc_location_types \ ,tag_gloc_regions \ ,tag_network_type \ ,tag_app_install_status \ ,tag_abh_object_type \ ,tag_abh_object_id_list \ ,tag_abh_time_window \ ,tag_abh_act_id_list \ ,tag_customized_audience \ ,tag_player_consupt \ ,tag_paying_user_type \ ,concat(day_week,'\|',hour) \ ,concat(day_type,'\|',hour) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(ad_adcreative_name,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(ad_adcreative_name,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gender,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),'\|',ifnull(adc_drp1_24,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gender,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(ifnull(tag_gloc_regions,''),''),'\|',ifnull(ad_space,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),'\|',ifnull(adc_drp1_24,'')) \ ,concat(ifnull(adg_bid_amount,''),'\|',ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \ ,concat(ifnull(ad_adcreative_name,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \ ,concat(ifnull(ad_adcreative_name,''),'\|',ifnull(tag_gender,'')) \ ,concat(ifnull(ad_adcreative_name,''),'\|',ifnull(tag_gender,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \ ,concat(ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,'')) \ ,concat(ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),ifnull(adc_drp1_24,'')) \ ,concat(ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(ad_adcreative_name,''),'\|',ifnull(tag_gender,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(atv_ratio_bin11_hisratio_h,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gender,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(ad_space,''),'\|',ifnull(adc_drp1_24,'')) \ ,concat(ifnull(adg_ts_workday1,''),'\|',ifnull(tag_gloc_regions,''),'\|',ifnull(adc_drp1_24,'')) \ FROM %s \ where %s and ind_first_industry_id=%d and adg_billing_event='%s' \ and adg_site_set='tss_SITE_SET_MOBILE_INNER' and id <= 100000000 %s " \ % (trans_tab, dt, indst_id, bill_evt, limit) # print(sql) rst = mysql.getAll(sql) if not rst: mysql.dispose() # 释放资源 print(' No data!') return -1 else: # 训练模型不能有缺失值,补0 df = pd.DataFrame(list(list(x.values()) for x in rst)).fillna('0') print(' %d rows data have been fetched.' % len(df)) mysql.dispose() return df