def history_repeat_filter(bid_history_files, the_ctr_model, bid_model, candidate_paras,outfilename): ''' 这里仅有一个参数 历史重演,对一批历史数据进行模拟投放 paras: bid_history_files:join文件列表 the_ctr_model:ctr模型 bid_model:出价模型 candidate_paras:候选参数 return: paras_perf[camp_grp][para] = {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':} ''' candidate_paras = {camp_grp:candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) == 1 } if len(candidate_paras) == 0: print 'there is no candidate parameter' return click_num = 0 imp_num = 0 cost = 0 click_num_old = 0 imp_num_old = 0 cost_old = 0 outfile=open(outfilename,'w') for bid_history_file in bid_history_files: for line in open(bid_history_file): #line = line.rstrip('\n') field_dict = join.get_field_dict(line) if field_dict == None: continue win_price = float(field_dict['win_price']) camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id']) if (not candidate_paras.has_key(camp_grp)) or (not bid_model.has_key(camp_grp)) or win_price == None or win_price <= 0: continue pctr = the_ctr_model.predict_ctr(field_dict['feature_values']) candidate_bid_prices = bid_model[camp_grp].get_bids_auc(pctr, candidate_paras[camp_grp]) if candidate_bid_prices[0] >= win_price: outfile.write(line) imp_num += 1 click_num += (1 if field_dict['click_flag'] == True else 0) cost += win_price imp_num_old += 1 click_num_old += (1 if field_dict['click_flag'] == True else 0) cost_old += win_price cost = float(cost)/1000 cost_old = float(cost_old)/1000 print 'file%s' % bid_history_files print 'old imp_num %d,click_num %d,cost %.4f' % (imp_num_old,click_num_old,cost_old) print 'old cpm %.4f,ctr %.8f,cpc %.4f' % (1000*cost_old/imp_num_old,float(click_num_old)/imp_num_old,cost_old/click_num_old) print 'imp_num %d,click_num %d,cost %.4f' % (imp_num,click_num,cost) print 'cpm %.4f,ctr %.8f,cpc %.4f' % (1000*cost/imp_num,float(click_num)/imp_num,cost/click_num)
def history_repeat_conditional(bid_history_files, the_ctr_model, bid_model, candidate_paras, condition_type, stat_result): ''' 历史重演,对一批历史数据进行模拟投放 paras: bid_history_files:join文件名列表,list类型 the_ctr_model:ctr模型 bid_model:出价模型 candidate_paras:候选参数 return: paras_perf[camp_grp][para] = {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':} ''' candidate_paras = { camp_grp: candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) != 0 } bpo_logger.info( '**************history_repeat function start**************') bpo_logger.info('files:%s' % ','.join(bid_history_files)) bpo_logger.info('************** bid_model: %s**************' % (bid_model.keys())) bpo_logger.info('************** candidate_paras: %s**************' % (candidate_paras)) bpo_logger.info('************** condition_type: %s**************' % (condition_type)) stop_flag_idx = {} result = {} #预制所有的键 for camp_grp in bid_model.keys(): #bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不在bid_model中,则continue if (not candidate_paras.has_key(camp_grp)) or ( not bid_model.has_key(camp_grp)): continue if not condition_type.has_key(camp_grp): continue tmp_num = len(candidate_paras[camp_grp]) stop_flag_idx[camp_grp] = tmp_num result[camp_grp] = [None] * tmp_num for idx in range(0, tmp_num): result[camp_grp][idx] = { 'impression': 0, 'avg_pctr': 0, 'click': 0, 'cost': 0 } req_parser = join.Parser() for bid_history_file in bid_history_files: for line in open(bid_history_file): req_parser.feed(line) field_dict = req_parser.get_all() if field_dict == None: continue win_price = float(field_dict['win_price']) camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id']) if win_price == None or win_price <= 0: continue if not result.has_key(camp_grp): continue pctr = the_ctr_model.predict_ctr(field_dict['feature_values']) candidate_bid_prices = bid_model[camp_grp].get_bids_auc( ctr=pctr, variable_paras=candidate_paras[camp_grp]) first_ge_idx = binary_search(candidate_bid_prices, win_price) #非累积方式 if first_ge_idx < stop_flag_idx[camp_grp]: for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]): result[camp_grp][tmp_idx]['impression'] += 1 result[camp_grp][tmp_idx]['avg_pctr'] += pctr result[camp_grp][tmp_idx]['click'] += ( 1 if field_dict['click_flag'] == True else 0) result[camp_grp][tmp_idx]['cost'] += win_price #指定资源限制方法:50%*总成本,50%*总点击 if condition_type[camp_grp] == 'cost': stop_flag_idx[camp_grp] = binary_search([ result[camp_grp][idx]['cost'] for idx in range(0, stop_flag_idx[camp_grp]) ], stat_result[camp_grp]['cost'] / 2) elif condition_type[camp_grp] == 'click': stop_flag_idx[camp_grp] = binary_search([ result[camp_grp][idx]['click'] for idx in range(0, stop_flag_idx[camp_grp]) ], stat_result[camp_grp]['click'] / 2) #计算ctr/avg_pctr/ecpc/cpm等衍生度量 for camp_grp in result: for tmp_idx in range(0, len(result[camp_grp])): result[camp_grp][tmp_idx]['cost'] /= 1000 # 修改单位,cpm为千次展示价格 tmp_dict = result[camp_grp][tmp_idx] result[camp_grp][tmp_idx]['ctr'] = ( (tmp_dict['click'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0 result[camp_grp][tmp_idx]['avg_pctr'] = ( (tmp_dict['avg_pctr'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0 result[camp_grp][tmp_idx]['ecpc'] = ( tmp_dict['cost'] ) / tmp_dict['click'] if tmp_dict['click'] != 0 else 0 result[camp_grp][tmp_idx][ 'cpm'] = 1000 * tmp_dict['cost'] / tmp_dict[ 'impression'] if tmp_dict['impression'] != 0 else 0 #提取出具体参数,而非参数索引 paras_perf = { camp_grp: { candidate_paras[camp_grp][idx]: result[camp_grp][idx] for idx in range(0, len(candidate_paras[camp_grp])) } for camp_grp in result.keys() } #打印结果 bpo_logger.info('**************history_repeat function end**************') formatter = 'paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n' for camp_grp in paras_perf.keys(): log_perf_str = 'bid_history_files:%s\n' % ','.join(bid_history_files) log_perf_str += formatter.format( camp=camp_grp[0], grp=camp_grp[1], strategy=bid_model[camp_grp].bid_strategy_type, para=bid_model[camp_grp].fixed_parameter) log_perf_str += 'condition type:%s\n' % condition_type[camp_grp] log_perf_str += 'para click imp cost cpc ctr avg_pctr cpm\n' log_perf_str += 'unit: US dollar\n' for para in sorted(paras_perf[camp_grp].keys()): tmp_dict = paras_perf[camp_grp][para] log_perf_str += str( para ) + ' %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n' % tmp_dict bpo_logger.info('************parameter performance start************') bpo_logger.info(log_perf_str) bpo_logger.info('************parameter performance end************') return paras_perf
def history_repeat(bid_history_files, the_ctr_model, bid_model, candidate_paras): ''' 历史重演,对一批历史数据进行模拟投放 paras: bid_history_files:join文件列表 the_ctr_model:ctr模型 bid_model:出价模型 candidate_paras:候选参数 return: paras_perf[camp_grp][para] = {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':} ''' candidate_paras = { camp_grp: candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) != 0 } bpo_logger.info( '**************history_repeat function start**************') bpo_logger.info('files:%s' % ','.join(bid_history_files)) bpo_logger.info('************** bid_model: %s**************' % (bid_model.keys())) bpo_logger.info('************** candidate_paras: %s**************' % (candidate_paras)) stop_flag_idx = {} result = {} for (camp_id, grp_id) in bid_model.keys(): tmp_num = len(candidate_paras[(camp_id, grp_id)]) stop_flag_idx[(camp_id, grp_id)] = tmp_num result[(camp_id, grp_id)] = [None] * tmp_num for idx in range(0, tmp_num): result[(camp_id, grp_id)][idx] = { 'impression': 0, 'avg_pctr': 0, 'click': 0, 'cost': 0 } req_parser = join.Parser() total_time = 0 for bid_history_file in bid_history_files: for line in open(bid_history_file): req_parser.feed(line) field_dict = req_parser.get_all() if field_dict == None: continue win_price = float(field_dict['win_price']) camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id']) #bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不再bid_model中,则continue if (not candidate_paras.has_key(camp_grp)) or ( not bid_model.has_key(camp_grp) ) or win_price == None or win_price <= 0: continue pctr = the_ctr_model.predict_ctr(field_dict['feature_values']) candidate_bid_prices = bid_model[camp_grp].get_bids_auc( ctr=pctr, variable_paras=candidate_paras[camp_grp]) first_ge_idx = binary_search(candidate_bid_prices, win_price) #非累积方式 #if first_ge_idx < stop_flag_idx[camp_grp]: # for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]): # result[camp_grp][tmp_idx]['impression'] += 1 # result[camp_grp][tmp_idx]['avg_pctr'] += pctr # result[camp_grp][tmp_idx]['click'] += (1 if field_dict['click_flag'] == '1' else 0) # result[camp_grp][tmp_idx]['cost'] += win_price # 因为没有投放限制(比如总预算,总预订点击等),下列语句被注释了 # 指定资源限制方法:这里是成本限制,50%*总成本 # print float(total_budget[camp_grp])/2 # stop_flag_idx[camp_grp] = binary_search([result[camp_grp][idx]['cost'] for idx in range(0,stop_flag_idx[camp_grp])],float(total_budget[camp_grp])/16) # print stop_flag_idx[camp_grp] #累积方式,累积方式是不能添加资源限制的。累计方式速度更快 if first_ge_idx < stop_flag_idx[camp_grp]: result[camp_grp][first_ge_idx]['impression'] += 1 result[camp_grp][first_ge_idx]['avg_pctr'] += pctr result[camp_grp][first_ge_idx]['click'] += ( 1 if field_dict['click_flag'] == True else 0) result[camp_grp][first_ge_idx]['cost'] += win_price for camp_grp in result: for idx in range(1, len(result[camp_grp])): for tmp_metric in ['impression', 'avg_pctr', 'click', 'cost']: result[camp_grp][idx][tmp_metric] += result[camp_grp][ idx - 1][tmp_metric] #计算ctr/avg_pctr/ecpc/cpm等衍生度量 for camp_grp in result: for tmp_idx in range(0, len(result[camp_grp])): result[camp_grp][tmp_idx]['cost'] /= 1000 # 修改单位 # print result[camp_grp] tmp_dict = result[camp_grp][tmp_idx] result[camp_grp][tmp_idx]['ctr'] = ( (tmp_dict['click'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0 result[camp_grp][tmp_idx]['avg_pctr'] = ( (tmp_dict['avg_pctr'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0 result[camp_grp][tmp_idx]['ecpc'] = ( tmp_dict['cost'] ) / tmp_dict['click'] if tmp_dict['click'] != 0 else 0 result[camp_grp][tmp_idx][ 'cpm'] = 1000 * tmp_dict['cost'] / tmp_dict[ 'impression'] if tmp_dict['impression'] != 0 else 0 #提取出具体参数,而非参数索引 paras_perf = {} #for camp_grp in result.keys(): # paras_perf[camp_grp] = {} # for idx in range(0, len(candidate_paras[camp_grp])): # paras_perf[camp_grp][candidate_paras[camp_grp][idx]] = result[camp_grp][idx] paras_perf = { camp_grp: { candidate_paras[camp_grp][idx]: result[camp_grp][idx] for idx in range(0, len(candidate_paras[camp_grp])) } for camp_grp in result.keys() } #打印结果 bpo_logger.info('**************history_repeat function end**************') formatter = 'paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n' for camp_grp in paras_perf.keys(): log_perf_str = 'bid_history_files:%s\n' % ','.join(bid_history_files) log_perf_str += formatter.format( camp=camp_grp[0], grp=camp_grp[1], strategy=bid_model[camp_grp].bid_strategy_type, para=bid_model[camp_grp].fixed_parameter) log_perf_str += 'para click imp cost cpc ctr avg_pctr cpm\n' log_perf_str += 'unit: US dollar\n' for para in sorted(paras_perf[camp_grp].keys()): tmp_dict = paras_perf[camp_grp][para] log_perf_str += str( para ) + ' %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n' % tmp_dict bpo_logger.info('************parameter performance start************') bpo_logger.info(log_perf_str) bpo_logger.info('************parameter performance end************') #print paras_perf return paras_perf
def history_repeat_filter(bid_history_files, the_ctr_model, bid_model, candidate_paras, outfilename): ''' 这里仅有一个参数 历史重演,对一批历史数据进行模拟投放 paras: bid_history_files:join文件列表 the_ctr_model:ctr模型 bid_model:出价模型 candidate_paras:候选参数 return: paras_perf[camp_grp][para] = {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':} ''' candidate_paras = { camp_grp: candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) == 1 } if len(candidate_paras) == 0: print 'there is no candidate parameter' return click_num = 0 imp_num = 0 cost = 0 click_num_old = 0 imp_num_old = 0 cost_old = 0 outfile = open(outfilename, 'w') for bid_history_file in bid_history_files: for line in open(bid_history_file): #line = line.rstrip('\n') field_dict = join.get_field_dict(line) if field_dict == None: continue win_price = float(field_dict['win_price']) camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id']) if (not candidate_paras.has_key(camp_grp)) or ( not bid_model.has_key(camp_grp) ) or win_price == None or win_price <= 0: continue pctr = the_ctr_model.predict_ctr(field_dict['feature_values']) candidate_bid_prices = bid_model[camp_grp].get_bids_auc( pctr, candidate_paras[camp_grp]) if candidate_bid_prices[0] >= win_price: outfile.write(line) imp_num += 1 click_num += (1 if field_dict['click_flag'] == True else 0) cost += win_price imp_num_old += 1 click_num_old += (1 if field_dict['click_flag'] == True else 0) cost_old += win_price cost = float(cost) / 1000 cost_old = float(cost_old) / 1000 print 'file%s' % bid_history_files print 'old imp_num %d,click_num %d,cost %.4f' % (imp_num_old, click_num_old, cost_old) print 'old cpm %.4f,ctr %.8f,cpc %.4f' % ( 1000 * cost_old / imp_num_old, float(click_num_old) / imp_num_old, cost_old / click_num_old) print 'imp_num %d,click_num %d,cost %.4f' % (imp_num, click_num, cost) print 'cpm %.4f,ctr %.8f,cpc %.4f' % ( 1000 * cost / imp_num, float(click_num) / imp_num, cost / click_num)
def history_repeat(bid_history_files, the_ctr_model, bid_model, candidate_paras): ''' 历史重演,对一批历史数据进行模拟投放 paras: bid_history_files:join文件列表 the_ctr_model:ctr模型 bid_model:出价模型 candidate_paras:候选参数 return: paras_perf[camp_grp][para] = {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':} ''' candidate_paras = {camp_grp:candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) != 0 } bpo_logger.info('**************history_repeat function start**************') bpo_logger.info('files:%s' % ','.join(bid_history_files)) bpo_logger.info('************** bid_model: %s**************' % (bid_model.keys())) bpo_logger.info('************** candidate_paras: %s**************' % (candidate_paras)) stop_flag_idx = {} result = {} for (camp_id, grp_id) in bid_model.keys(): tmp_num = len(candidate_paras[(camp_id, grp_id)]) stop_flag_idx[(camp_id, grp_id)] = tmp_num result[(camp_id, grp_id)] = [None] * tmp_num for idx in range(0, tmp_num): result[(camp_id, grp_id)][idx] = {'impression':0, 'avg_pctr':0, 'click':0, 'cost':0} req_parser = join.Parser() total_time = 0 for bid_history_file in bid_history_files: for line in open(bid_history_file): req_parser.feed(line) field_dict = req_parser.get_all() if field_dict == None: continue win_price = float(field_dict['win_price']) camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id']) #bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不再bid_model中,则continue if (not candidate_paras.has_key(camp_grp)) or (not bid_model.has_key(camp_grp)) or win_price == None or win_price <= 0: continue pctr = the_ctr_model.predict_ctr(field_dict['feature_values']) candidate_bid_prices = bid_model[camp_grp].get_bids_auc(ctr=pctr, variable_paras=candidate_paras[camp_grp]) first_ge_idx = binary_search(candidate_bid_prices, win_price) #非累积方式 #if first_ge_idx < stop_flag_idx[camp_grp]: # for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]): # result[camp_grp][tmp_idx]['impression'] += 1 # result[camp_grp][tmp_idx]['avg_pctr'] += pctr # result[camp_grp][tmp_idx]['click'] += (1 if field_dict['click_flag'] == '1' else 0) # result[camp_grp][tmp_idx]['cost'] += win_price # 因为没有投放限制(比如总预算,总预订点击等),下列语句被注释了 # 指定资源限制方法:这里是成本限制,50%*总成本 # print float(total_budget[camp_grp])/2 # stop_flag_idx[camp_grp] = binary_search([result[camp_grp][idx]['cost'] for idx in range(0,stop_flag_idx[camp_grp])],float(total_budget[camp_grp])/16) # print stop_flag_idx[camp_grp] #累积方式,累积方式是不能添加资源限制的。累计方式速度更快 if first_ge_idx < stop_flag_idx[camp_grp]: result[camp_grp][first_ge_idx]['impression'] += 1 result[camp_grp][first_ge_idx]['avg_pctr'] += pctr result[camp_grp][first_ge_idx]['click'] += (1 if field_dict['click_flag'] == True else 0) result[camp_grp][first_ge_idx]['cost'] += win_price for camp_grp in result: for idx in range(1,len(result[camp_grp])): for tmp_metric in ['impression','avg_pctr','click','cost']: result[camp_grp][idx][tmp_metric] += result[camp_grp][idx-1][tmp_metric] #计算ctr/avg_pctr/ecpc/cpm等衍生度量 for camp_grp in result: for tmp_idx in range(0, len(result[camp_grp])): result[camp_grp][tmp_idx]['cost'] /= 1000 # 修改单位 # print result[camp_grp] tmp_dict = result[camp_grp][tmp_idx] result[camp_grp][tmp_idx]['ctr'] = ((tmp_dict['click'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0 result[camp_grp][tmp_idx]['avg_pctr'] = ((tmp_dict['avg_pctr'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0 result[camp_grp][tmp_idx]['ecpc'] = (tmp_dict['cost']) / tmp_dict['click'] if tmp_dict['click'] != 0 else 0 result[camp_grp][tmp_idx]['cpm'] = 1000 * tmp_dict['cost'] / tmp_dict['impression'] if tmp_dict['impression'] != 0 else 0 #提取出具体参数,而非参数索引 paras_perf = {} #for camp_grp in result.keys(): # paras_perf[camp_grp] = {} # for idx in range(0, len(candidate_paras[camp_grp])): # paras_perf[camp_grp][candidate_paras[camp_grp][idx]] = result[camp_grp][idx] paras_perf ={camp_grp:{ candidate_paras[camp_grp][idx]:result[camp_grp][idx] for idx in range(0, len(candidate_paras[camp_grp]))} for camp_grp in result.keys()} #打印结果 bpo_logger.info('**************history_repeat function end**************') formatter = 'paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n' for camp_grp in paras_perf.keys(): log_perf_str = 'bid_history_files:%s\n' % ','.join(bid_history_files) log_perf_str += formatter.format( camp=camp_grp[0], grp=camp_grp[1], strategy=bid_model[camp_grp].bid_strategy_type, para=bid_model[camp_grp].fixed_parameter) log_perf_str += 'para click imp cost cpc ctr avg_pctr cpm\n' log_perf_str += 'unit: US dollar\n' for para in sorted(paras_perf[camp_grp].keys()): tmp_dict = paras_perf[camp_grp][para] log_perf_str += str(para) + ' %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n' % tmp_dict bpo_logger.info('************parameter performance start************') bpo_logger.info(log_perf_str) bpo_logger.info('************parameter performance end************') #print paras_perf return paras_perf
def history_repeat_conditional( bid_history_files, the_ctr_model, bid_model, candidate_paras, condition_type, stat_result ): """ 历史重演,对一批历史数据进行模拟投放 paras: bid_history_files:join文件名列表,list类型 the_ctr_model:ctr模型 bid_model:出价模型 candidate_paras:候选参数 return: paras_perf[camp_grp][para] = {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':} """ candidate_paras = { camp_grp: candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) != 0 } bpo_logger.info("**************history_repeat function start**************") bpo_logger.info("files:%s" % ",".join(bid_history_files)) bpo_logger.info("************** bid_model: %s**************" % (bid_model.keys())) bpo_logger.info("************** candidate_paras: %s**************" % (candidate_paras)) bpo_logger.info("************** condition_type: %s**************" % (condition_type)) stop_flag_idx = {} result = {} # 预制所有的键 for camp_grp in bid_model.keys(): # bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不在bid_model中,则continue if (not candidate_paras.has_key(camp_grp)) or (not bid_model.has_key(camp_grp)): continue if not condition_type.has_key(camp_grp): continue tmp_num = len(candidate_paras[camp_grp]) stop_flag_idx[camp_grp] = tmp_num result[camp_grp] = [None] * tmp_num for idx in range(0, tmp_num): result[camp_grp][idx] = {"impression": 0, "avg_pctr": 0, "click": 0, "cost": 0} req_parser = join.Parser() for bid_history_file in bid_history_files: for line in open(bid_history_file): req_parser.feed(line) field_dict = req_parser.get_all() if field_dict == None: continue win_price = float(field_dict["win_price"]) camp_grp = (field_dict["campaign_id"], field_dict["adgroup_id"]) if win_price == None or win_price <= 0: continue if not result.has_key(camp_grp): continue pctr = the_ctr_model.predict_ctr(field_dict["feature_values"]) candidate_bid_prices = bid_model[camp_grp].get_bids_auc(ctr=pctr, variable_paras=candidate_paras[camp_grp]) first_ge_idx = binary_search(candidate_bid_prices, win_price) # 非累积方式 if first_ge_idx < stop_flag_idx[camp_grp]: for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]): result[camp_grp][tmp_idx]["impression"] += 1 result[camp_grp][tmp_idx]["avg_pctr"] += pctr result[camp_grp][tmp_idx]["click"] += 1 if field_dict["click_flag"] == True else 0 result[camp_grp][tmp_idx]["cost"] += win_price # 指定资源限制方法:50%*总成本,50%*总点击 if condition_type[camp_grp] == "cost": stop_flag_idx[camp_grp] = binary_search( [result[camp_grp][idx]["cost"] for idx in range(0, stop_flag_idx[camp_grp])], stat_result[camp_grp]["cost"] / 2, ) elif condition_type[camp_grp] == "click": stop_flag_idx[camp_grp] = binary_search( [result[camp_grp][idx]["click"] for idx in range(0, stop_flag_idx[camp_grp])], stat_result[camp_grp]["click"] / 2, ) # 计算ctr/avg_pctr/ecpc/cpm等衍生度量 for camp_grp in result: for tmp_idx in range(0, len(result[camp_grp])): result[camp_grp][tmp_idx]["cost"] /= 1000 # 修改单位,cpm为千次展示价格 tmp_dict = result[camp_grp][tmp_idx] result[camp_grp][tmp_idx]["ctr"] = ( ((tmp_dict["click"] + 0.0) / tmp_dict["impression"]) if tmp_dict["impression"] != 0 else 0 ) result[camp_grp][tmp_idx]["avg_pctr"] = ( ((tmp_dict["avg_pctr"] + 0.0) / tmp_dict["impression"]) if tmp_dict["impression"] != 0 else 0 ) result[camp_grp][tmp_idx]["ecpc"] = (tmp_dict["cost"]) / tmp_dict["click"] if tmp_dict["click"] != 0 else 0 result[camp_grp][tmp_idx]["cpm"] = ( 1000 * tmp_dict["cost"] / tmp_dict["impression"] if tmp_dict["impression"] != 0 else 0 ) # 提取出具体参数,而非参数索引 paras_perf = { camp_grp: { candidate_paras[camp_grp][idx]: result[camp_grp][idx] for idx in range(0, len(candidate_paras[camp_grp])) } for camp_grp in result.keys() } # 打印结果 bpo_logger.info("**************history_repeat function end**************") formatter = "paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n" for camp_grp in paras_perf.keys(): log_perf_str = "bid_history_files:%s\n" % ",".join(bid_history_files) log_perf_str += formatter.format( camp=camp_grp[0], grp=camp_grp[1], strategy=bid_model[camp_grp].bid_strategy_type, para=bid_model[camp_grp].fixed_parameter, ) log_perf_str += "condition type:%s\n" % condition_type[camp_grp] log_perf_str += "para click imp cost cpc ctr avg_pctr cpm\n" log_perf_str += "unit: US dollar\n" for para in sorted(paras_perf[camp_grp].keys()): tmp_dict = paras_perf[camp_grp][para] log_perf_str += ( str(para) + " %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n" % tmp_dict ) bpo_logger.info("************parameter performance start************") bpo_logger.info(log_perf_str) bpo_logger.info("************parameter performance end************") return paras_perf