Exemplo n.º 1
0
def esmm_model_train(train_data_path):
    # 确定嵌入层维度
    cat2id = read_data_pickle("cat2id.pkl")
    cat12id = read_data_pickle("cat12id.pkl")
    brand2id = read_data_pickle("brand2id.pkl")
    pagecat2id = read_data_pickle("pagecat2id.pkl")

    cat_feat_size = len(cat2id.keys())
    cat1_feat_size = len(cat12id.keys())
    brand_feat_size = len(brand2id.keys())
    pagecat_feat_size = len(pagecat2id.keys())

    # 初始化模型
    model = EsmmModel(cat_names_feature_size=cat_feat_size,
                      cat_names_embedding_size=16,
                      cat1_names_feature_size=cat1_feat_size,
                      cat1_names_embedding_size=8,
                      brand_names_feature_size=brand_feat_size,
                      brand_names_embedding_size=32,
                      pagecat_names_feature_size=pagecat_feat_size,
                      pagecat_names_embedding_size=64,
                      output_dir=get_data_path() +
                      "../saved_sessions/result/model/")

    # 训练模型,并保存
    df_train = pd.read_csv(get_data_path() + train_data_path,
                           dtype={
                               "pagecat": int,
                               "brand": int,
                               "cat": int,
                               "cat1": int
                           })
    model.train(df_train, df_train)
    model.clear_all()
Exemplo n.º 2
0
def get_trained_model():
    cat2id = read_data_pickle("cat2id.pkl")
    cat12id = read_data_pickle("cat12id.pkl")
    brand2id = read_data_pickle("brand2id.pkl")
    pagecat2id = read_data_pickle("pagecat2id.pkl")

    cat_feat_size = len(cat2id.keys())
    cat1_feat_size = len(cat12id.keys())
    brand_feat_size = len(brand2id.keys())
    pagecat_feat_size = len(pagecat2id.keys())

    # 初始化模型
    model = EsmmModel(cat_names_feature_size=cat_feat_size,
                      cat_names_embedding_size=16,
                      cat1_names_feature_size=cat1_feat_size,
                      cat1_names_embedding_size=8,
                      brand_names_feature_size=brand_feat_size,
                      brand_names_embedding_size=32,
                      pagecat_names_feature_size=pagecat_feat_size,
                      pagecat_names_embedding_size=64,
                      output_dir=get_data_path() +
                      "../saved_sessions/result/model/")
    model.build_place_holders()
    model.build_dense_weight()
    model.build_deep_weight()
    model.build_dense_layer()
    model.build_input_layer()
    model.build_logit_layer()
    model.restore_session()

    return model
Exemplo n.º 3
0
    def evaluate(self, test_data):
        total_sample_num = len(test_data["purchased"].values.tolist())
        fd = self.feed_batch(test_data, 0, total_sample_num)
        loss, purchase_pred, click_pred = self.sess.run(
            [self.loss, self.purchase_predictions, self.ctr_predictions],
            feed_dict=fd)
        cvr_label = np.array(test_data["purchased"].values.tolist())
        ctr_label = np.array(test_data["clicked"].values.tolist())

        purchase_pred = [1 if label >= 0.5 else 0 for label in purchase_pred]
        click_pred = [1 if label >= 0.5 else 0 for label in click_pred]
        cvr_label = [int(label) for label in cvr_label]
        ctr_label = [int(label) for label in ctr_label]

        log_handler.log.info("actually purchased: %s" % sum(cvr_label))
        log_handler.log.info("actually clicked: %s" % sum(ctr_label))

        log_handler.log.info("predict purchased: %s" % sum(purchase_pred))
        log_handler.log.info("predict clicked: %s" % sum(click_pred))

        test_data["purchased_pred"] = np.array(purchase_pred)
        test_data["clicked_pred"] = np.array(click_pred)
        test_data[[
            "goodssn", "keyword", "expose_total", "click_total",
            "purchase_total", "click_rate", "purchase_rate", "clicked",
            "purchased", "clicked_pred", "purchased_pred"
        ]].to_csv(get_data_path() + "final_result.csv")

        purchase_pred_score = self.eval_metric(cvr_label, purchase_pred)
        click_pred_score = self.eval_metric(ctr_label, click_pred)
        return loss, purchase_pred_score, click_pred_score
Exemplo n.º 4
0
def model_train():
    try:
        log_handler.log.info(
            "----------------Training Esmm Model----------------")
        rmdir_if_exist(get_data_path() + "../saved_sessions/result/model")
        esmm_model_train(FEATURE_TRAIN_SOURCE)
        log_handler.log.info(
            "----------------Finish training Esmm Model----------------")
        time.sleep(5)
    except Exception:
        log_handler.log.info(
            "----------------Error training Esmm Model----------------")
        log_handler.log.info(str(excp_trace()))
        raise Exception
Exemplo n.º 5
0
    def predict(self, test_data, result_file):
        total_sample_num = len(test_data["pagecat"].values.tolist())
        test_data["purchased"] = np.zeros(total_sample_num, dtype="float")
        test_data["clicked"] = np.zeros(total_sample_num, dtype="float")
        fd = self.feed_batch(test_data, 0, total_sample_num)
        purchase_prob, click_prob = self.sess.run(
            [self.purchase_predictions, self.ctr_predictions], feed_dict=fd)

        test_data["click_prob"] = click_prob
        test_data["purchase_prob"] = purchase_prob
        test_data["overall_prob"] = test_data["purchase_prob"].multiply(
            test_data["click_prob"])
        test_data = test_data[[
            "goodssn", "keyword", "purchase_prob", "click_prob", "overall_prob"
        ]]
        pd.to_pickle(test_data, get_data_path() + result_file)