def run(Base_Dir_40p208, date, cid, bid, conf_dict, train_list, test_list): train_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date) train_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date) test_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date) test_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date) train_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_pos_processed".format(cid, bid, date) train_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_neg_processed".format(cid, bid, date) test_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_pos_processed".format(cid, bid, date) test_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_neg_processed".format(cid, bid, date) result_dir = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/".format(cid, bid, date) result_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/result".format(cid, bid, date) predict_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/predict".format(cid, bid,date) do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}".format(cid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}".format(cid, bid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}".format(cid, bid, date)) do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}".format(cid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}".format(cid, bid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}".format(cid, bid, date)) # 本地开发 ''' train_pos = "./data/train_pos_dl" train_neg = "./data/train_neg_dl" test_pos = "./data/test_pos_dl" test_neg = "./data/test_neg_dl" train_pos_processed = "./data2/train_pos_processed" train_neg_processed = "./data2/train_neg_processed" test_pos_processed = "./data2/test_pos_processed" test_neg_processed = "./data2/test_neg_processed" result_dir = "./result/" result_file = "./result/result_file" predict_file = "./result/predict_file" ''' process = PreProcess(train_pos, train_neg, test_pos, test_neg, result_dir, conf_dict) process_info_list, processed_length = process.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed) coef, auc = lr.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed, predict_file) writer = open(result_file, "w") writer.write("coef:\n") for i in range(len(processed_length)): writer.write("-------------------- " + process_info_list[i][1] + " --------------------\n") writer.write("--- " + process_info_list[i][2] + " ---\n") for weight in coef[0][sum(processed_length[:i]) : sum(processed_length[:i+1])]: writer.write("{0}\n".format(weight)) writer.write("\nauc:\n") writer.write("{0}\n".format(auc)) writer.close() # 将实验结果存入mysql save_model_lr.run(cid, bid, train_list, test_list, process_info_list, result_dir, result_file)
def run(conf_dict): # train_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date) # train_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date) # test_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date) # test_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date) # # train_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_pos_processed".format(cid, bid, date) # train_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_neg_processed".format(cid, bid, date) # test_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_pos_processed".format(cid, bid, date) # test_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_neg_processed".format(cid, bid, date) # # result_dir = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/".format(cid, bid, date) # result_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/result".format(cid, bid, date) # predict_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/predict".format(cid, bid,date) # # # do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}".format(cid)) # do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}".format(cid, bid)) # do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}".format(cid, bid, date)) # do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}".format(cid)) # do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}".format(cid, bid)) # do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}".format(cid, bid, date)) # 本地开发 DIR = os.path.abspath(os.curdir +"/../"); train_pos = '%s/data/train_pos' % DIR train_neg = "%s/data/train_neg" % DIR test_pos = "%s/data/test_pos" % DIR test_neg = "%s/data/test_neg" % DIR train_pos_processed = "%s/data2/train_pos_processed" % DIR train_neg_processed = "%s/data2/train_neg_processed" % DIR test_pos_processed = "%s/data2/test_pos_processed" % DIR test_neg_processed = "%s/data2/test_neg_processed" % DIR result_dir = "%s/result/" % DIR result_file = "%s/result/result_file" % DIR predict_file = "%s/result/predict_file" % DIR process = PreProcess(train_pos, train_neg, test_pos, test_neg, result_dir, conf_dict) process_info_list, processed_length = process.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed) coef, auc = lr.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed, predict_file) writer = open(result_file, "w") writer.write("coef:\n") for i in range(len(processed_length)): writer.write("-------------------- " + process_info_list[i][1] + " --------------------\n") writer.write("--- " + process_info_list[i][2] + " ---\n") for weight in coef[0][sum(processed_length[:i]) : sum(processed_length[:i+1])]: writer.write("{0}\n".format(weight)) writer.write("\nauc:\n") print 'auc:',auc writer.write("{0}\n".format(auc)) writer.close()
def run(Base_Dir_40p208, date, cid, bid, conf_dict, train_list, test_list): train_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format( cid, bid, date) train_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format( cid, bid, date) test_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format( cid, bid, date) test_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format( cid, bid, date) train_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_pos_processed".format( cid, bid, date) train_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_neg_processed".format( cid, bid, date) test_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_pos_processed".format( cid, bid, date) test_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_neg_processed".format( cid, bid, date) result_dir = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/".format( cid, bid, date) result_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/result".format( cid, bid, date) predict_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/predict".format( cid, bid, date) do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}".format(cid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}".format(cid, bid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}".format(cid, bid, date)) do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}".format(cid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}".format(cid, bid)) do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}".format(cid, bid, date)) # 本地开发 ''' train_pos = "./data/train_pos_dl" train_neg = "./data/train_neg_dl" test_pos = "./data/test_pos_dl" test_neg = "./data/test_neg_dl" train_pos_processed = "./data2/train_pos_processed" train_neg_processed = "./data2/train_neg_processed" test_pos_processed = "./data2/test_pos_processed" test_neg_processed = "./data2/test_neg_processed" result_dir = "./result/" result_file = "./result/result_file" predict_file = "./result/predict_file" ''' process = PreProcess(train_pos, train_neg, test_pos, test_neg, result_dir, conf_dict) process_info_list, processed_length = process.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed) coef, auc = lr.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed, predict_file) writer = open(result_file, "w") writer.write("coef:\n") for i in range(len(processed_length)): writer.write("-------------------- " + process_info_list[i][1] + " --------------------\n") writer.write("--- " + process_info_list[i][2] + " ---\n") for weight in coef[0][sum(processed_length[:i] ):sum(processed_length[:i + 1])]: writer.write("{0}\n".format(weight)) writer.write("\nauc:\n") writer.write("{0}\n".format(auc)) writer.close() # 将实验结果存入mysql save_model_lr.run(cid, bid, train_list, test_list, process_info_list, result_dir, result_file)