def init(): timestamp=datetime.datetime.now().strftime("%Y-%m-%d-%H") g_conf.read("../conf/ad_svr.conf") logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='[%Y-%m_%d %H:%M:%S]', filename='../log/ad_svr.'+timestamp+'.log', filemode='a') service_para=g_conf.get("gearman","para") service_name=g_conf.get("gearman","name") process_num=int(g_conf.get("gearman","process_num")) service_para=service_para.split(',') #load adx map adx_id_obj=adx_id_map(g_conf) global g_adx_interface g_adx_interface=adx_interface_t(adx_id_obj,g_conf) global g_invert_idx_mgr g_invert_idx_mgr={} ridx_name_list=g_conf.get("index","ridx_list").split(",") for name in ridx_name_list: obj=inverted_index_t() obj.load_file("../index/"+name+".ridx") g_invert_idx_mgr[name]=obj #print obj.inverted_dict,name global g_idx_mgr g_idx_mgr={} idx_name_list=g_conf.get("index","idx_list").split(",") for name in idx_name_list: obj=index_t() obj.load_file("../index/"+name+".idx") g_idx_mgr[name]=obj #init ip region global g_ip_obj g_ip_obj=ip_parse_t(g_conf.get("file","ip_table"),g_conf.get("file","ip_region")) #init id obj global g_id_obj g_id_obj=transform_id_t(g_conf.get("file","fea_id_file")) #init fea obj extract_fea_obj=extract_feature_t(g_ip_obj,g_id_obj) #init model obj model_obj=predict_model_t(g_conf.get("file","model")) #init idea operator operator_obj=idea_operator_t(g_conf.get("file","idea_operate")) #init rank bit module global g_rank_bid g_rank_bid=rank_bid_t(extract_fea_obj,model_obj,operator_obj) logging.info("init complete") #init filter global g_filter_obj g_filter_obj=filter_t(g_invert_idx_mgr,g_conf) return [service_para,service_name,process_num]
def process(input_file, fea_output_file, ml_file): input_fp = open(input_file, "r") fea_fp = open(fea_output_file, "w") ml_fp = open(ml_file, "w") global g_ip_obj global g_id_obj g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt") g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file")) line_cnt = 0 single_feature_list = ["region", "app", "time", "app_type", "manufacture"] #combine_feature_list=["combine-region-time","combine-app-region"] #combine_feature_list=["combine-region-time"] combine_feature_list = [] header = "click\tconsume\t" + "\t".join( single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n" fea_fp.write(header) for line in input_fp: line_cnt += 1 line = line.rstrip("\r\n").split("\t") click = int(line[2]) consume = int(line[3]) time_str = line[1] req_json = line[4] click = int(line[2]) try: req_dict = json.loads(req_json) fea_json_dict = json.loads(req_dict["request"]) except: logging.warning("load json failed[%d]" % (line_cnt)) continue fea_str = "" feature_value_dict = extract_feature(time_str, fea_json_dict, single_feature_list, combine_feature_list) for feature in single_feature_list: fea_str += feature_value_dict[feature] + "\t" for feature in combine_feature_list: fea_str += feature_value_dict[feature] + "\t" fea_str = fea_str.rstrip("\t") fea_fp.write("%d\t%d\t%s\n" % (click, consume, fea_str)) #transform to id fea_id_list = [] fea_id_str = "" list_temp = fea_str.split("\t") for fea_name in list_temp: fea_id = g_id_obj.get_id(fea_name) fea_id_list.append(fea_id) fea_id_list.sort() for fea_id in fea_id_list: fea_id_str += str(fea_id) + ":1" + " " fea_id_str = fea_id_str.rstrip(" ") ml_fp.write("%d %s\n" % (click, fea_id_str))
def process(input_file, fea_output_file, ml_file): input_fp = open(input_file, "r") fea_fp = open(fea_output_file, "w") ml_fp = open(ml_file, "w") global g_ip_obj global g_id_obj g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt") g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file")) line_cnt = 0 single_feature_list = ["region", "app", "time", "app_type", "manufacture"] # combine_feature_list=["combine-region-time","combine-app-region"] # combine_feature_list=["combine-region-time"] combine_feature_list = [] header = "click\tconsume\t" + "\t".join(single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n" fea_fp.write(header) for line in input_fp: line_cnt += 1 line = line.rstrip("\r\n").split("\t") click = int(line[2]) consume = int(line[3]) time_str = line[1] req_json = line[4] click = int(line[2]) try: req_dict = json.loads(req_json) fea_json_dict = json.loads(req_dict["request"]) except: logging.warning("load json failed[%d]" % (line_cnt)) continue fea_str = "" feature_value_dict = extract_feature(time_str, fea_json_dict, single_feature_list, combine_feature_list) for feature in single_feature_list: fea_str += feature_value_dict[feature] + "\t" for feature in combine_feature_list: fea_str += feature_value_dict[feature] + "\t" fea_str = fea_str.rstrip("\t") fea_fp.write("%d\t%d\t%s\n" % (click, consume, fea_str)) # transform to id fea_id_list = [] fea_id_str = "" list_temp = fea_str.split("\t") for fea_name in list_temp: fea_id = g_id_obj.get_id(fea_name) fea_id_list.append(fea_id) fea_id_list.sort() for fea_id in fea_id_list: fea_id_str += str(fea_id) + ":1" + " " fea_id_str = fea_id_str.rstrip(" ") ml_fp.write("%d %s\n" % (click, fea_id_str))
def process(input_file, fea_output_file, ml_file, type): input_fp = open(input_file, "r") fea_fp = open(fea_output_file, "w") ml_fp = open(ml_file, "w") global g_ip_obj global g_id_obj g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt") g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file")) line_cnt = 0 single_feature_list = ["region", "app", "time", "app_type", "manufacture"] #combine_feature_list=["combine-region-time","combine-app-region"] #combine_feature_list=["combine-region-time"] combine_feature_list = [] header = "click\tdownload\tinstall\topen\tconsume\t" + "\t".join( single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n" fea_fp.write(header) for line in input_fp: line_cnt += 1 line = line.rstrip("\r\n").split("\t") time_str = line[0] try: fea_json_dict = json.loads(line[2]) except: logging.warning("load json failed[%d]" % (line_cnt)) continue (fea_str, fea_id_str) = extract_feature(time_str, fea_json_dict, single_feature_list, combine_feature_list) ad_action_dict = json.loads(line[1]) click = ad_action_dict["click"] download = ad_action_dict["download"] install = ad_action_dict["install"] app_open = ad_action_dict["open"] consume = ad_action_dict["cost"] fea_fp.write("%d\t%d\t%d\t%d\t%d\t%s\n" % (click, download, install, app_open, consume, fea_str)) #transform to id if type == "train" and download == 1: count = 5 else: count = 1 for i in range(0, count): ml_fp.write("%d %s\n" % (click, fea_id_str))
def process(input_file,fea_output_file,ml_file,type): input_fp=open(input_file,"r") fea_fp=open(fea_output_file,"w") ml_fp=open(ml_file,"w") global g_ip_obj global g_id_obj g_ip_obj=ip_parse_t("../data/ip.all","../data/region.txt") g_id_obj=transform_id_t(g_conf.get("file","fea_id_file")) line_cnt=0 single_feature_list=["region","app","time","app_type","manufacture"] #combine_feature_list=["combine-region-time","combine-app-region"] #combine_feature_list=["combine-region-time"] combine_feature_list=[] header="click\tdownload\tinstall\topen\tconsume\t"+"\t".join(single_feature_list)+"\t"+"\t".join(combine_feature_list)+"\n" fea_fp.write(header) for line in input_fp: line_cnt+=1 line=line.rstrip("\r\n").split("\t") time_str=line[0] try: fea_json_dict=json.loads(line[2]) except: logging.warning("load json failed[%d]" %(line_cnt)) continue (fea_str,fea_id_str)=extract_feature(time_str,fea_json_dict,single_feature_list,combine_feature_list) ad_action_dict=json.loads(line[1]) click=ad_action_dict["click"] download=ad_action_dict["download"] install=ad_action_dict["install"] app_open=ad_action_dict["open"] consume=ad_action_dict["cost"] fea_fp.write("%d\t%d\t%d\t%d\t%d\t%s\n" %(click,download,install,app_open,consume,fea_str)) #transform to id if type=="train" and download==1: count=5 else: count=1 for i in range(0,count): ml_fp.write("%d %s\n" %(click,fea_id_str))
def init(): timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H") g_conf.read("../conf/ad_svr.conf") logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='[%Y-%m_%d %H:%M:%S]', filename='../log/ad_svr.' + timestamp + '.log', filemode='a') service_para = g_conf.get("gearman", "para") service_name = g_conf.get("gearman", "name") process_num = int(g_conf.get("gearman", "process_num")) service_para = service_para.split(',') #load adx map adx_id_obj = adx_id_map(g_conf) global g_adx_interface g_adx_interface = adx_interface_t(adx_id_obj, g_conf) global g_invert_idx_mgr g_invert_idx_mgr = {} ridx_name_list = g_conf.get("index", "ridx_list").split(",") for name in ridx_name_list: obj = inverted_index_t() obj.load_file("../index/" + name + ".ridx") g_invert_idx_mgr[name] = obj #print obj.inverted_dict,name global g_idx_mgr g_idx_mgr = {} idx_name_list = g_conf.get("index", "idx_list").split(",") for name in idx_name_list: obj = index_t() obj.load_file("../index/" + name + ".idx") g_idx_mgr[name] = obj #init ip region global g_ip_obj g_ip_obj = ip_parse_t(g_conf.get("file", "ip_table"), g_conf.get("file", "ip_region")) #init id obj global g_id_obj g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file")) #init fea obj extract_fea_obj = extract_feature_t(g_ip_obj, g_id_obj) #init model obj model_obj = predict_model_t(g_conf.get("file", "model")) #init idea operator operator_obj = idea_operator_t(g_conf.get("file", "idea_operate")) #init rank bit module global g_rank_bid g_rank_bid = rank_bid_t(extract_fea_obj, model_obj, operator_obj) logging.info("init complete") #init filter global g_filter_obj g_filter_obj = filter_t(g_invert_idx_mgr, g_conf) return [service_para, service_name, process_num]