def main(argv):
    t = time.time() 
    dnn_feature_columns, linear_feature_columns = get_feature_columns()
    stage = argv[1]
    print('Stage: %s'%stage)
    eval_dict = {}
    predict_dict = {}
    predict_time_cost = {}
    ids = None
    for action in ACTION_LIST:
        print("Action:", action)
        model = WideAndDeep(linear_feature_columns, dnn_feature_columns, stage, action)
        model.build_estimator()

        if stage in ["online_train", "offline_train"]:
            # 训练 并评估
            model.train()
            ids, logits, action_uauc = model.evaluate()
            eval_dict[action] = action_uauc

        if stage == "evaluate":
            # 评估线下测试集结果,计算单个行为的uAUC值,并保存预测结果
            ids, logits, action_uauc = model.evaluate()
            eval_dict[action] = action_uauc
            predict_dict[action] = logits

        if stage == "submit":
            # 预测线上测试集结果,保存预测结果
            ids, logits, ts = model.predict()
            predict_time_cost[action] = ts
            predict_dict[action] = logits

    if stage in ["evaluate", "offline_train", "online_train"]:
        # 计算所有行为的加权uAUC
        print(eval_dict)
        weight_dict = {"read_comment": 4, "like": 3, "click_avatar": 2, "favorite": 1, "forward": 1,
                       "comment": 1, "follow": 1}
        weight_auc = compute_weighted_score(eval_dict, weight_dict)
        print("Weighted uAUC: ", weight_auc)


    if stage in ["evaluate", "submit"]:
        # 保存所有行为的预测结果,生成submit文件
        actions = pd.DataFrame.from_dict(predict_dict)
        print("Actions:", actions)
        ids[["userid", "feedid"]] = ids[["userid", "feedid"]].astype(int)
        res = pd.concat([ids, actions], sort=False, axis=1)
        # 写文件
        file_name = "submit_" + str(int(time.time())) + ".csv"
        submit_file = os.path.join(FLAGS.root_path, stage, file_name)
        print('Save to: %s'%submit_file)
        res.to_csv(submit_file, index=False)

    if stage == "submit":
        print('不同目标行为2000条样本平均预测耗时(毫秒):')
        print(predict_time_cost)
        print('单个目标行为2000条样本平均预测耗时(毫秒):')
        print(np.mean([v for v in predict_time_cost.values()]))
    print('Time cost: %.2f s'%(time.time()-t))
def main(argv):
    stage = argv[1]
    eval_dict = {}
    predict_dict = {}
    ids = None
    submit = pd.read_csv(ROOT_PATH + '/test_data.csv')[['userid', 'feedid']]
    for action in ACTION_LIST:
        print("-------------------Action-----------------:", action)
        model = lgb_ctr(stage, action)
        if stage == "offline_train":
            # 离线训练并评估
            ids, logits, action_uauc = model.train_test()
            eval_dict[action] = action_uauc
            predict_dict[action] = logits

        elif stage == "online_train":
            # 评估线下测试集结果,计算单个行为的uAUC值,并保存预测结果
            ids, logits = model.train_test()
            predict_dict[action] = logits

        else:
            print("stage must be in [online_train,offline_train]")
    #
    if stage == "offline_train":
        print(eval_dict)
        weight_dict = {
            "read_comment": 4,
            "like": 3,
            "click_avatar": 2,
            "favorite": 1,
            "forward": 1,
            "comment": 1,
            "follow": 1
        }
        weight_auc = compute_weighted_score(eval_dict, weight_dict)
        print("Weighted uAUC: ", weight_auc)

    if stage == "online_train":
        # 计算所有行为的加权uAUC
        actions = pd.DataFrame.from_dict(predict_dict)
        print("Actions:", actions)
        ids[["userid", "feedid"]] = ids[["userid", "feedid"]].astype(int)
        res = pd.concat([ids, actions], sort=False, axis=1)
        # 写文件
        submit_file = "./submit_lgb_6_6.csv"
        print('Save to: %s' % submit_file)
        res.to_csv(submit_file, index=False)
Пример #3
0
    assert args.action in ACTION_LIST  # read_comment,forward,like,click_avatar,comment,follow,favorite

    score = {}
    for action in ACTION_LIST:
        print(f"------------------Training {action}------------")
        score[action] = main(root_path=args.root_path,
                             stage=args.stage,
                             model_name=args.model_name,
                             action=action,
                             epoch=args.epoch,
                             learning_rate=args.learning_rate,
                             batch_size=args.batch_size,
                             weight_decay=args.weight_decay,
                             device=args.device,
                             save_dir=args.save_dir)
    wauc = compute_weighted_score(score, WEIGHT_AUC)
    print(f"weight uAUC:{wauc}")
    with open('result.txt', 'a+') as f:
        f.write(f'{args.model_name} wAUC {wauc}\n')
    print(f'test wAUC: {wauc}')

    # submit(
    #     root_path=args.root_path,
    #     stage="submit",
    #     model_name=args.model_name,
    #     action="all",
    #     epoch=args.epoch,
    #     learning_rate=args.learning_rate,
    #     batch_size=args.batch_size,
    #     weight_decay=args.weight_decay,
    #     device=args.device,