Пример #1
0
def train(data, test, m, gamma):
    """
    该函数主要作用: 定义网络;定义数据,定义损失函数和优化器,计算重要指标,开始训练(训练网络,计算在测试集上的指标)
    主要需要调整的参数: m 与 gamma

    :return:
    """
    learnRate = 0.0001
    lam = 0.00005
    dim = 64
    iter_ = 50
    init_st = 0.01
    m = m
    gamma = gamma
    batch_size = 100
    n = 1000
    k = 5
    # 计算numUser, numItem, numTag
    dataload = DataSet(data, test, True)
    num_user, num_item, num_tag = dataload.calc_number_of_dimensions()
    predict_user_weight, item_weight = dataload.weight_to_vector(num_user, num_item, num_tag)
    # model = AttentionTAPITF(int(num_user), int(num_item), int(num_tag), dim, init_st, m, gamma,  predict_user_weight, item_weight,ini_embeddings, True, 'tag', 'TAMLP').cuda()
    model = AttentionTAPITF(int(num_user), int(num_item), int(num_tag), dim, init_st, m, gamma,  predict_user_weight, item_weight, ini_embeddings, True, 'tag', 'GMF').cuda()
    # model = AttentionTAPITF(int(num_user), int(num_item), int(num_tag), dim, init_st, m, gamma, predict_user_weight, item_weight, False, True, 'tag', 'TAMLP').cuda()
    # torch.save(model.state_dict(), 'attention_initial_params')
    # 对每个正样本进行负采样
    loss_function = SinglePITF_Loss().cuda()
    opti = optim.SGD(model.parameters(), lr=learnRate, weight_decay=lam)
    # opti = optim.Adam(model.parameters(), lr=learnRate, weight_decay=lam)
    opti.zero_grad()
    # 每个epoch中的sample包含一个正样本和j个负样本
    best_result = 0
    # best_result_state = model.state_dict()
    # best_file = open('Attention_best_params.txt', 'a')
    # all_data = dataload.get_sequential(num_tag, m, 10, True)
    for epoch in range(iter_):
        # file_ = open('AttentionTureParam.txt', 'a')
        all_data = []
        all_data = dataload.get_sequential(num_tag, m, 10, True)
        all_data = all_data[:, :8 + m]
        losses = []
        print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        for i, batch in enumerate(dataload.get_batch(all_data, batch_size)):
            # print(batch)
            # input_ = dataload.draw_negative_sample(num_tag, sample, True)
            r = model(torch.LongTensor(batch).cuda())
            opti.zero_grad()
            # print(model.embedding.userVecs.weight)
            loss = loss_function(r)
            # print(loss)
            # print(loss)
            loss.backward()
            opti.step()
            losses.append(loss.data)
            if i % n == 0:
                print("[%02d/%d] [%03d/%d] mean_loss : %0.2f" % (
                epoch, iter_, i, len(all_data) / batch_size, np.mean(losses)))
                losses = []
        precision = 0
        recall = 0
        count = 0
        mrr = 0
        recommend_count = 0
        ndcg = 0
        validaTagSet = dataload.validaTagSet

        for u in validaTagSet.keys():
            for i in validaTagSet[u].keys():
                number = 0
                tags = validaTagSet[u][i]
                tagsNum = len(tags)
                if u in dataload.userShortMemory.keys():
                    x_t = torch.LongTensor([u, i] + list(dataload.userShortMemory[u][:m])).cuda()
                else:
                    x_t = torch.LongTensor([u, i] + list(np.zeros(m))).cuda()
                x_t = x_t.unsqueeze(0)
                y_pre = model.predict_top_k(x_t, k)
                # print(y_pre)
                for tag in y_pre:
                    if int(tag) in tags:
                        number += 1
                precision = precision + float(number / k)
                recall = recall + float(number / tagsNum)
                count += 1
                mrr = mrr + mrr_rank_score(list(y_pre), list(tags))
                # print(ndcg_score(np.array(y_pre), list(tags)))
                ndcg = ndcg + ndcg_score(np.array(y_pre), list(tags), k)
                recommend_count += tagsNum
        precision = precision / count
        recall = recall / count
        mrr = mrr / recommend_count
        ndcg = ndcg / count
        if precision == 0 and recall == 0:
            f_score = 0
        else:
            f_score = 2 * (precision * recall) / (precision + recall)
        print("Precisions: " + str(precision))
        print("Recall: " + str(recall))
        print("F1: " + str(f_score))
        print("MRR: " + str(mrr))
        print("NDCG: " + str(ndcg))
        # 将模型最好时的效果保存下来
        if f_score > best_result:
            best_result = f_score
            # best_result_state = model.state_dict()
        print("best result: " + str(best_result))
        print("==================================")
Пример #2
0
def train(data, test):
    """
    该函数主要作用: 定义网络;定义数据,定义损失函数和优化器,计算重要指标,开始训练(训练网络,计算在测试集上的指标)
    主要需要调整的参数: m 与 gamma

    :return:
    """
    learnRate = 0.05
    lam = 0.00005
    dim = 64
    iter_ = 100
    init_st = 0.01
    m = 5
    gamma = 0.5
    batch_size = 100
    n = 1000
    # 计算numUser, numItem, numTag
    dataload = DataSet(data, test, True)
    num_user, num_item, num_tag = dataload.calc_number_of_dimensions()
    model = AttentionPITF(num_user, num_item, num_tag, dim, init_st, m,
                          gamma).cuda()
    torch.save(model.state_dict(), 'attention_initial_params')
    # 对每个正样本进行负采样
    loss_function = SinglePITF_Loss().cuda()
    opti = optim.SGD(model.parameters(), lr=learnRate, weight_decay=lam)
    opti.zero_grad()
    # 每个epoch中的sample包含一个正样本和j个负样本
    best_result = 0
    best_result_state = model.state_dict()
    for epoch in range(iter_):
        all_data = dataload.get_sequential(num_tag, m, 100)
        all_data = all_data[:, :4 + m]
        losses = []
        print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        for i, batch in enumerate(dataload.get_batch(all_data, batch_size)):

            # print(batch)
            # input_ = dataload.draw_negative_sample(num_tag, sample, True)
            r = model(torch.LongTensor(batch).cuda())
            opti.zero_grad()
            # print(model.embedding.userVecs.weight)
            loss = loss_function(r)
            # print(loss)
            loss.backward()
            opti.step()
            losses.append(loss.data)
            if i % n == 0:
                print("[%02d/%d] [%03d/%d] mean_loss : %0.2f" %
                      (epoch, iter_, i, len(all_data) / batch_size,
                       np.mean(losses)))
                losses = []
        precision = 0
        recall = 0
        count = 0
        validaTagSet = dataload.validaTagSet
        validaTimeList = dataload.validaUserTimeList
        for u in validaTagSet.keys():
            for i in validaTagSet[u].keys():
                number = 0
                tags = validaTagSet[u][i]
                tagsNum = len(tags)
                x_t = torch.LongTensor(
                    [u, i] + list(dataload.userShortMemory[u][:m])).cuda()
                x_t = x_t.unsqueeze(0)
                y_pre = model.predict_top_k(x_t)
                for tag in y_pre[0]:
                    if int(tag) in tags:
                        number += 1
                precision = precision + float(number / 5)
                recall = recall + float(number / tagsNum)
                count += 1
        precision = precision / count
        recall = recall / count
        if precision == 0 and recall == 0:
            f_score = 0
        else:
            f_score = 2 * (precision * recall) / (precision + recall)
        print("Precisions: " + str(precision))
        print("Recall: " + str(recall))
        print("F1: " + str(f_score))
        # 将模型最好时的效果保存下来
        if f_score > best_result:
            best_result = f_score
            best_result_state = model.state_dict()
        print("best result: " + str(best_result))
        print("==================================")
    # torch.save(model, "net.pkl")
    torch.save(best_result_state, "attention_net_params.pkl")
Пример #3
0
def train(data, test):
    """
    该函数主要作用: 定义网络;定义数据,定义损失函数和优化器,计算重要指标,开始训练(训练网络,计算在测试集上的指标)
    :return:
    """
    learnRate = 0.01
    lam = 0.00005
    dim = 64
    iter_ = 100
    init_st = 0.01
    # 计算numUser, numItem, numTag
    dataload = DataSet(data, test)
    num_user, num_item, num_tag = dataload.calc_number_of_dimensions()
    model = NeuralPITF(num_user, num_item, num_tag, dim, init_st).cuda()
    # 对每个正样本进行负采样
    loss_function = PITF_Loss().cuda()
    opti = optim.SGD(model.parameters(), lr=learnRate, weight_decay=lam)
    opti.zero_grad()
    # 每个epoch中的sample包含一个正样本和j个负样本
    for epoch in range(iter_):
        losses = []
        n = 0
        for sample in data:
            n += 1
            numNeg = 10
            input_ = sample
            while numNeg > 0:
                numNeg -= 1
                neg = dataload.draw_negative_sample(num_tag, input_)
                sample = Variable(torch.LongTensor(input_)).cuda()
                neg = Variable(torch.LongTensor(neg)).cuda()
                r_p = model(sample)
                r_n = model(neg)
                opti.zero_grad()
                # print(model.embedding.userVecs.weight)
                loss = loss_function(r_p, r_n)
                loss.backward()
                opti.step()
                losses.append(loss.data)
            if n % 1000 == 0:
                print("the loss of %s sample in %s iter is : " %
                      (str(epoch), str(n)) + str(np.mean(losses)))
        precision = 0
        recall = 0
        count = 0
        validaTagSet = dataload.validaTagSet
        for u in validaTagSet.keys():
            for i in validaTagSet[u].keys():
                number = 0
                tags = validaTagSet[u][i]
                tagsNum = len(tags)
                y_pre = model.predict_top_k(u, i, 5)
                for tag in y_pre:
                    if tag in tags:
                        number += 1
                precision = precision + float(number / 5)
                recall = recall + float(number / tagsNum)
                count += 1
        precision = precision / count
        recall = recall / count
        if precision == 0 and recall == 0:
            f_score = 0
        else:
            f_score = 2 * (precision * recall) / (precision + recall)
        print("Precisions: " + str(precision))
        print("Recall: " + str(recall))
        print("F1: " + str(f_score))
        print("==================================")