Пример #1
0
def ConvMF(res_dir,
           train_user,
           train_item,
           valid_user,
           test_user,
           R,
           CNN_X,
           vocab_size,
           init_W=None,
           give_item_weight=True,
           max_iter=50,
           lambda_u=1,
           lambda_v=100,
           dimension=50,
           dropout_rate=0.2,
           emb_dim=200,
           max_len=300,
           num_kernel_per_ws=100):
    # explicit setting
    a = 1
    b = 0

    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J],
                               dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim,
                            max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = theta

    endure_count = 5
    count = 0
    for iteration in range(max_iter):
        loss = 0
        tic = time.time()
        print("%d iteration\t(patience: %d)" % (iteration, count))

        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in range(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            A = VV + (a - b) * (V_i.T.dot(V_i))
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)

            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        sub_loss = np.zeros(num_item)
        UU = b * (U.T.dot(U))
        for j in range(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            V[j] = np.linalg.solve(A, B)

            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        if (val_eval < pre_val_eval):
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval

        print(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))
        f1.write(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))

        if (count == endure_count):
            break

        PREV_LOSS = loss

    f1.close()
Пример #2
0
def DRMF(res_dir,
         train_user,
         train_item,
         valid_user,
         test_user,
         R,
         DNN_X,
         DNN_Y,
         vocab_size,
         init_W=None,
         give_weight=True,
         max_iter=50,
         lambda_u=1,
         lambda_v=100,
         dimension=50,
         dropout_rate=0.2,
         emb_dim=200,
         num_kernel_per_ws=50,
         dnn_type='CNN_GRU',
         reg_schema='Dual',
         gru_outdim=50,
         maxlen_doc=[10, 10],
         maxlen_sent=[30, 30]):
    # explicit setting
    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'a')
    f1.write("### DRMF-%s-%s ###\n\n" % (reg_schema, dnn_type))
    f1.write("===Configuration===\n")
    f1.write("lambda_u=%f, lambda_v=%f\n" % (lambda_u, lambda_v))
    f1.write("maxlen_doc=[%d,%d], maxlen_sent=[%d,%d]\n" %
             (maxlen_doc[0], maxlen_doc[1], maxlen_sent[0], maxlen_sent[1]))
    f1.write(
        "emb_dim=%d, dimension=%d, num_kernel_per_ws=%d, dropout_rate=%.2f, gru_outdim=%d\n\n"
        % (emb_dim, dimension, num_kernel_per_ws, dropout_rate, gru_outdim))
    f1.write("Tr:Training, Val:Validation, Te:Test, []: [MAE, MSE, RMSE]\n")

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_weight is True:
        item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J],
                               dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight

        user_weight = np.array([math.sqrt(len(u)) for u in Train_R_I],
                               dtype=float)
        user_weight = (float(num_user) / user_weight.sum()) * user_weight

    else:
        item_weight = np.ones(num_item, dtype=float)
        user_weight = np.ones(num_user, dtype=float)

    pre_val_rmse = 1e10
    if dnn_type == 'CNN':
        if reg_schema == 'Item' or reg_schema == 'Dual':
            dnn_module_x = CNN_module(dimension, vocab_size, dropout_rate,
                                      emb_dim, maxlen_doc[0] * maxlen_sent[0],
                                      num_kernel_per_ws, init_W)
        if reg_schema == 'User' or reg_schema == 'Dual':
            dnn_module_y = CNN_module(dimension, vocab_size, dropout_rate,
                                      emb_dim, maxlen_doc[1] * maxlen_sent[1],
                                      num_kernel_per_ws, init_W)

    if dnn_type == 'CNN_GRU':
        if reg_schema == 'Item' or reg_schema == 'Dual':
            dnn_module_x = CNN_GRU_module(dimension, vocab_size, dropout_rate,
                                          emb_dim, gru_outdim, maxlen_doc[0],
                                          maxlen_sent[0], num_kernel_per_ws,
                                          init_W)
        if reg_schema == 'User' or reg_schema == 'Dual':
            dnn_module_y = CNN_GRU_module(dimension, vocab_size, dropout_rate,
                                          emb_dim, gru_outdim, maxlen_doc[1],
                                          maxlen_sent[1], num_kernel_per_ws,
                                          init_W)

    if reg_schema == 'Item' or reg_schema == 'Dual':
        theta = dnn_module_x.get_projection_layer(DNN_X)
    if reg_schema == 'User' or reg_schema == 'Dual':
        phi = dnn_module_y.get_projection_layer(DNN_Y)

    np.random.seed(133)

    if reg_schema == 'User' or reg_schema == 'Dual':
        U = phi
    else:
        U = np.random.uniform(size=(num_user, dimension))

    if reg_schema == 'Item' or reg_schema == 'Dual':
        V = theta
    else:
        V = np.random.uniform(size=(num_item, dimension))

    count = 0
    for iteration in range(max_iter):
        loss = 0
        tic = time.time()
        print("%d iteration\t(patience: %d)" % (iteration, count))
        f1.write("%d iteration\t(patience: %d)\n" % (iteration, count))

        VV = b * (V.T.dot(V))
        sub_loss = np.zeros(num_user)
        # update U
        for i in range(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]

            tmp_A = VV + (a - b) * (V_i.T.dot(V_i))
            A = tmp_A + lambda_u * user_weight[i] * np.eye(dimension)
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)
            if reg_schema == 'User' or reg_schema == 'Dual':
                B = B + lambda_u * user_weight[i] * phi[i]
            U[i] = np.linalg.solve(A, B)
            # -\frac{\lambda_u}{2}\sum_i u_i^Tu_i
            if reg_schema == 'Item':
                sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss += np.sum(sub_loss)

        sub_loss_dev = np.zeros(num_item)
        sub_loss = np.zeros(num_item)
        # update V
        UU = b * (U.T.dot(U))
        for j in range(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)).sum(0)
            if reg_schema == 'Item' or reg_schema == 'Dual':
                B = B + lambda_v * item_weight[j] * theta[j]

            V[j] = np.linalg.solve(A, B)
            # -\sum_i\sum_j\frac{c_{i,j}}{2}(r_{ij}-u_i^T v_j)^2
            sub_loss_dev[j] = -0.5 * a * np.square(R_j).sum()
            sub_loss_dev[j] += a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss_dev[j] += -0.5 * np.dot(V[j].dot(tmp_A), V[j])
            # -\frac{\lambda_v}{2}\sum_jv_j^Tv_j
            if reg_schema == 'User':
                sub_loss[j] = -0.5 * lambda_v * np.dot(V[j], V[j])

        loss += np.sum(sub_loss_dev)
        loss += np.sum(sub_loss)

        seed = np.random.randint(100000)

        if reg_schema == 'Item' or reg_schema == 'Dual':
            history_x = dnn_module_x.train(DNN_X, V, item_weight, seed)
            theta = dnn_module_x.get_projection_layer(DNN_X)
            # -\frac{\lambda_v}{2}\sum_j(v_j-\theta_j)^T(v_j-\theta_j)
            cnn_loss_x = history_x.history['loss'][-1]
            loss += -0.5 * lambda_v * cnn_loss_x * num_item

        if reg_schema == 'User' or reg_schema == 'Dual':
            history_y = dnn_module_y.train(DNN_Y, U, user_weight, seed)
            phi = dnn_module_y.get_projection_layer(DNN_Y)
            # -\frac{\lambda_u}{2}\sum_i (u_i-\phi_i)^T(u_i-\phi_i)
            cnn_loss_y = history_y.history['loss'][-1]
            loss += -0.5 * lambda_u * cnn_loss_y * num_user

        tr_mae, tr_mse, tr_rmse = eval_RATING(Train_R_I, U, V, train_user[0])
        val_mae, val_mse, val_rmse = eval_RATING(Valid_R, U, V, valid_user[0])
        te_mae, te_mse, te_rmse = eval_RATING(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        if iteration == 0:
            converge = -1
        else:
            converge = abs((loss - PREV_LOSS) / PREV_LOSS)


#         if (val_rmse < pre_val_rmse):
#             if dnn_type == 'CNN':
#                 if os.path.exists(res_dir + '/drmf_cnn') is not True:
#                     os.mkdir(res_dir + '/drmf_cnn')
#                     if os.path.exists(res_dir + '/drmf_cnn/dual') is not True:
#                         os.mkdir(res_dir + '/drmf_cnn/dual')
#                     if os.path.exists(res_dir + '/drmf_cnn/user') is not True:
#                         os.mkdir(res_dir + '/drmf_cnn/user')
#                     if os.path.exists(res_dir + '/drmf_cnn/item') is not True:
#                         os.mkdir(res_dir + '/drmf_cnn/item')
#                 if reg_schema == 'Dual':
#                     np.savetxt(res_dir + '/drmf_cnn/dual/U.dat', U)
#                     np.savetxt(res_dir + '/drmf_cnn/dual/V.dat', V)
#                     np.savetxt(res_dir + '/drmf_cnn/dual/theta.dat', theta)
#                     np.savetxt(res_dir + '/drmf_cnn/dual/phi.dat', phi)
#                     dnn_module_x.save_model(res_dir + '/drmf_cnn/dual/x_weights.hdf5')
#                     dnn_module_y.save_model(res_dir + '/drmf_cnn/dual/y_weights.hdf5')
#                 if reg_schema == 'User':
#                     np.savetxt(res_dir + '/drmf_cnn/user/U.dat', U)
#                     np.savetxt(res_dir + '/drmf_cnn/user/V.dat', V)
#                     np.savetxt(res_dir + '/drmf_cnn/user/phi.dat', phi)
#                     dnn_module_y.save_model(res_dir + '/drmf_cnn/user/y_weights.hdf5')
#                 if reg_schema == 'Item':
#                     np.savetxt(res_dir + '/drmf_cnn/item/U.dat', U)
#                     np.savetxt(res_dir + '/drmf_cnn/item/V.dat', V)
#                     np.savetxt(res_dir + '/drmf_cnn/item/theta.dat', theta)
#                     dnn_module_x.save_model(res_dir + '/drmf_cnn/item/x_weights.hdf5')
#             if dnn_type == 'CNN_GRU':
#                 if os.path.exists(res_dir + '/drmf_cnn_gru') is not True:
#                     os.mkdir(res_dir + '/drmf_cnn_gru')
#                     if os.path.exists(res_dir + '/drmf_cnn_gru/dual') is not True:
#                         os.mkdir(res_dir + '/drmf_cnn_gru/dual')
#                     if os.path.exists(res_dir + '/drmf_cnn_gru/user') is not True:
#                         os.mkdir(res_dir + '/drmf_cnn_gru/user')
#                     if os.path.exists(res_dir + '/drmf_cnn_gru/item') is not True:
#                         os.mkdir(res_dir + '/drmf_cnn_gru/item')
#                 if reg_schema == 'Dual':
#                     np.savetxt(res_dir + '/drmf_cnn_gru/dual/U.dat', U)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/dual/V.dat', V)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/dual/theta.dat', theta)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/dual/phi.dat', phi)
#                     dnn_module_x.save_model(res_dir + '/drmf_cnn_gru/dual/x_weights.hdf5')
#                     dnn_module_y.save_model(res_dir + '/drmf_cnn_gru/dual/y_weights.hdf5')
#                 if reg_schema == 'User':
#                     np.savetxt(res_dir + '/drmf_cnn_gru/user/U.dat', U)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/user/V.dat', V)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/user/phi.dat', phi)
#                     dnn_module_y.save_model(res_dir + '/drmf_cnn_gru/user/y_weights.hdf5')
#                 if reg_schema == 'Item':
#                     np.savetxt(res_dir + '/drmf_cnn_gru/item/U.dat', U)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/item/V.dat', V)
#                     np.savetxt(res_dir + '/drmf_cnn_gru/item/theta.dat', theta)
#                     dnn_module_x.save_model(res_dir + '/drmf_cnn_gru/item/x_weights.hdf5')
#         else:
#             count = count + 1
# for fast running, without saving models
        if (val_rmse >= pre_val_rmse):
            count = count + 1
        pre_val_rmse = val_rmse

        print(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: [%.5f, %.5f, %.5f] Val: [%.5f, %.5f, %.5f] Te: [%.5f, %.5f, %.5f] "
            % (loss, elapsed, converge, tr_mae, tr_mse, tr_rmse, val_mae,
               val_mse, val_rmse, te_mae, te_mse, te_rmse))
        f1.write(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: [%.5f, %.5f, %.5f] Val: [%.5f, %.5f, %.5f] Te: [%.5f, %.5f, %.5f]\n"
            % (loss, elapsed, converge, tr_mae, tr_mse, tr_rmse, val_mae,
               val_mse, val_rmse, te_mae, te_mse, te_rmse))

        if (count == ENDURE_COUNT):
            break

        PREV_LOSS = loss

    f1.close()
Пример #3
0
def ConvMF(res_dir,
           train_user,
           train_item,
           valid_user,
           test_user,
           R,
           CNN_X,
           vocab_size,
           init_W=None,
           give_item_weight=True,
           max_iter=50,
           lambda_u=1,
           lambda_v=100,
           dimension=50,
           dropout_rate=0.2,
           emb_dim=200,
           max_len=300,
           num_kernel_per_ws=100):
    '''
    构造并训练卷积矩阵分解模型
    :param res_dir:结果文件路径
    :param train_user:训练集用户稀疏评分向量(libSVM format)
    :param train_item:训练集物品稀疏评分向量(libSVM format)
    :param valid_user:测试集用户稀疏评分向量(libSVM format)
    :param test_user:测试集物品稀疏评分向量(libSVM format)
    :param R:原始评分数据,format: user id::item id::rating
    :param CNN_X:物品描述词序列
    :param vocab_size:词表大小
    :param init_W:如果为None则动态训练词向量权重
    :param give_item_weight:如果为True则使用静态词向量,否则动态训练词向量
    :param max_iter:最大迭代次数
    :param lambda_u:用户端正则惩罚项系数
    :param lambda_v:用户端正则惩罚项系数
    :param dimension:隐变量维度
    :param dropout_rate:丢弃率
    :param emb_dim:词嵌入维度
    :param max_len:物品文本描述序列最大长度
    :param num_kernel_per_ws:CNN的卷积核个数
    :return:None
    '''
    # explicit setting
    a = 1
    b = 0

    num_user = R.shape[0]  #6040
    num_item = R.shape[1]  #3544
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')

    Train_R_I = train_user[1]  #user rating_list
    Train_R_J = train_item[1]  #item rating_list
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J],
                               dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10
    ## init CNN model
    cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim,
                            max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    # user-latent matrix
    U = np.random.uniform(size=(num_user, dimension))
    # item-latent matrix
    V = theta

    endure_count = 5  #超出5次则退出迭代训练
    count = 0
    for iteration in range(max_iter):
        loss = 0
        tic = time.time()
        print("%d iteration\t(patience: %d)" % (iteration, count))

        ##get user-latent matirx loss
        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)  #theta V
        sub_loss = np.zeros(num_user)

        for i in range(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            A = VV + (a - b) * (V_i.T.dot(V_i))
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)

            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)
        ##get item-latent matirx loss
        sub_loss = np.zeros(num_item)
        UU = b * (U.T.dot(U))
        for j in range(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            V[j] = np.linalg.solve(A, B)

            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)

        # get cnn loss
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        # get rmse eval
        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)
        #save u,v,w weight
        if (val_eval < pre_val_eval):
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval
        print(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))
        f1.write(
            "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n"
            % (loss, elapsed, converge, tr_eval, val_eval, te_eval))
        if (count == endure_count):
            break
        PREV_LOSS = loss

    f1.close()
Пример #4
0
def Haec(res_dir,
         train_user,
         train_item,
         valid_user,
         test_user,
         R,
         CNN_X,
         vocab_size,
         init_W=None,
         give_item_weight=True,
         max_iter=30,
         lambda_u=1,
         lambda_v=1,
         dimension=50,
         dropout_rate=0.2,
         emb_dim=200,
         max_len=300,
         num_kernel_per_ws=100):
    # explicit setting
    a = 1
    b = 0
    num_user = R.shape[0]
    num_item = R.shape[1]
    # print '=====R.all========'
    # print num_user,num_item

    fileU = '../data/pre/ml_1m/User.npy'
    # fileR = "../Data/convmf/preprocessed/movielens_100k/R.npy"

    Uinfo = getData1(fileUser=fileU)
    PREV_LOSS = 1e-50
    PREV_TE = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')
    # Train_R_I按用户进行汇总的评分列表[[用户1的所有评分][用户2的所有评分]...],长6040
    Train_R_I = train_user[1]
    # Train_R_I按产品进行汇总的评分列表[[产品1的所有评分][产品2的所有评分]...],长3544
    Train_R_J = train_item[1]
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        #原理:评分越多的产品,权重越大
        # item_weight:"每个产品对应的用户评论数的开方"
        item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J],
                               dtype=float)
        # item_weight=item_weight*(产品数/所有产品对应的评论的开方之和),类似归一化
        # 处理后,每个Item对应的权重限制在(0-5)之间,float。
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim,
                            max_len, num_kernel_per_ws, init_W)

    # theta,cnn输出层,litst,长度3544,每个元素为长50的list,数字,取值(-0.06,0.039)。
    theta = cnn_module.get_projection_layer(CNN_X)
    V = theta
    #随机初始化前,设置种子seed,便于复现

    np.random.seed(133)
    # # # U:6040list(6040,50),V:长为3544的list,每个list为含50个元素的子list。
    U = np.random.uniform(size=(num_user, dimension))
    # g1 = tf.Graph()
    # with tf.Session(graph=tf.get_default_graph()) as sess:
    config = tf.ConfigProto()
    config.gpu_options.allocator_type = 'BFC'
    sess = tf.Session(config=config)
    model = USDAE1(sess, R.shape, Uinfo.shape, is_training=True, **mlp_args)
    print("build model...")
    model.build()
    u_loss, ulatent = model.train(R.toarray(), Uinfo, U,
                                  mlp_args["learning_rate"])
    U = ulatent
    endure_count = 5
    count = 0
    for iteration in xrange(max_iter):
        loss = 0
        mcount = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)
        #公式7的一部分,b被设置为0,所以VV为:值为lambda_u*I_k
        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)

        # 令偏导为0,得到所有的U[i],并求U部分的loss
        #初始化U部分的loss,共计num_user(6040)个
        sub_loss = np.zeros(num_user)
        for i in xrange(num_user):
            # idx_item:用户i评论过的电影ID列表
            idx_item = train_user[0][i]
            #V_i:从V中筛选出用户i所评论的item。长度(主List):用户i对应的评论数,子list:50.
            V_i = V[idx_item]
            #R_i用户i的所有评分,size(1,len(idx_item))
            R_i = Train_R_I[i]
            #A,size(1,50),公式7求逆的部分,每个子元素size(1,50)
            A = VV + (a - b) * (V_i.T.dot(V_i))
            # B,size(1,50),公式7后面的部分,每个子元素为一个数。
            # np.tile(A,rep):重复rep次A来构建array;

            B = (a * V_i *
                 (np.tile(R_i,
                          (dimension, 1)).T)).sum(0) + lambda_u * ulatent[i].T
            # linalg线性代数模块,solve(A,B),求解Ax=B 线性方程组
            U[i] = np.linalg.solve(A, B)

            # U部分的Loss
            sub_loss[i] = -0.5 * lambda_u * (np.sum(
                np.square(U[i] - ulatent[i])))
        loss = loss + np.sum(sub_loss)

        # 令偏导为0,得到所有的V[j],并求V部分的loss
        # 初始化V部分的loss,共计num_item(3544)个
        sub_loss = np.zeros(num_item)
        # b为0
        UU = b * (U.T.dot(U))
        for j in xrange(num_item):
            # idx_user:产品j被评论的用户ID列表
            idx_user = train_item[0][j]
            # U_j:从U中筛选出在idx_user出现过的。长度(主List):产品i对应的评论数,子list:50.
            U_j = U[idx_user]
            R_j = Train_R_J[j]
            # tmp_A =(U_j.T.dot(U_j))
            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            # A=公式8求逆的部分,多加入了item_weight[j]
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            # B=公式8的后半部分,注意cnn部分loss=lambda_v * item_weight[j] * theta[j]
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            # print '================num_item=============', num_item
            # print len(theta[j])
            # print 'A.shape:', A.shape
            # print 'B.shape:', B.shape
            V[j] = np.linalg.solve(A, B)
            #下面三个式子:公式6的第一项,(R-UV)^2
            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])
        loss = loss + np.sum(sub_loss)

        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]
        u_loss, ulatent = model.train(R.toarray(), Uinfo, U,
                                      mlp_args["learning_rate"])

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item - u_loss
        tr_eval = eval_MAE(Train_R_I, U, V, train_user[0])
        val_eval = eval_MAE(Valid_R, U, V, valid_user[0])
        te_eval = eval_MAE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        if te_eval > PREV_TE:
            mcount += 1
        if mcount > 2:
            break
        if (val_eval < pre_val_eval):
            # cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write(
            "Iteration:%d Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n"
            % (iteration, loss, elapsed, converge, tr_eval, val_eval, te_eval))

        # if (count == endure_count):
        #     break
        PREV_LOSS = loss
        PREV_TE = te_eval
    f1.close()
Пример #5
0
def ConvMF(res_dir, train_user, train_item, valid_user, test_user,
           R, CNN_X, vocab_size, init_W=None, give_item_weight=True,
           max_iter=50, lambda_u=1, lambda_v=100, dimension=50,
           dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100):
    # explicit setting
    a = 1
    b = 0

    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = 1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    f1 = open(res_dir + '/state.log', 'w')

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]
    Valid_R = valid_user[1]

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i))
                                for i in Train_R_J], dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate,
                            emb_dim, max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = theta

    endure_count = 5
    count = 0
    for iteration in xrange(max_iter):
        loss = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)

        VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in xrange(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            A = VV + (a - b) * (V_i.T.dot(V_i))
            B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)

            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        sub_loss = np.zeros(num_item)
        UU = b * (U.T.dot(U))
        for j in xrange(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]

            tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
            A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
            B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                 ).sum(0) + lambda_v * item_weight[j] * theta[j]
            V[j] = np.linalg.solve(A, B)

            sub_loss[j] = -0.5 * np.square(R_j * a).sum()
            sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
            sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        if (val_eval < pre_val_eval):
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/U.dat', U)
            np.savetxt(res_dir + '/V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
        else:
            count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval))

        if (count == endure_count):
            break

        PREV_LOSS = loss

    f1.close()
Пример #6
0
def main():
    #cnn_cae_transfer
    # exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/5-8_in-matrix_no-val_0.01-100_w_cnn-100_cae-50_transfer-2_no-sc//fold-4/'
    exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/6-9_in-matrix-200_no-val_0.01-100-w-cnn-100_cae-25_transfer-noSC/fold-4/'
    exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/7-9_in-matrix-200_no-val_0.01-100-w-cnn-50_cae-150_transfer-noSC/fold-1'

    #cnn_cae_concat
    # exp_dir ='/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/6-8_out-of-matrix_no-val_1-1000-w-cnn-100_cae-50_concat/fold-1/'
    #cnn
    # exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/6-8_in-matrix_no-val_0.01-100_w_cnn/fold-1'
    fc_weights_file = os.path.join(exp_dir, 'FC_weights.npy')
    module = 'cnn_cae_transfer'
    ''' Network parameters'''
    nb_filters = 50
    cae_N_hidden = 150
    nb_features = 23
    if module == 'cnn_cae_transfer' or module == 'cnn_cae_concat':
        weights_file = os.path.join(exp_dir, 'CNN_CAE_weights.hdf5')
    elif module == 'cnn':
        weights_file = os.path.join(exp_dir, 'CNN_weights.hdf5')
    if not os.path.exists(fc_weights_file):
        if module == 'cnn_cae_transfer':
            model = CNN_CAE_transfer_module(output_dimesion=200,
                                            vocab_size=8001,
                                            dropout_rate=0.2,
                                            emb_dim=200,
                                            max_len=300,
                                            nb_filters=nb_filters,
                                            init_W=None,
                                            cae_N_hidden=cae_N_hidden,
                                            nb_features=nb_features)
        elif module == 'cnn_cae_concat':
            model = CNN_CAE_module(output_dimesion=200,
                                   vocab_size=8001,
                                   dropout_rate=0.2,
                                   emb_dim=200,
                                   max_len=300,
                                   nb_filters=nb_filters,
                                   init_W=None,
                                   cae_N_hidden=cae_N_hidden,
                                   nb_features=17)
        elif module == 'cnn':
            model = CNN_module(output_dimesion=200,
                               vocab_size=8001,
                               dropout_rate=0.2,
                               emb_dim=200,
                               max_len=300,
                               nb_filters=nb_filters,
                               init_W=None)
        model.load_model(weights_file)
        if module == 'cnn_cae_concat':
            layer_name = 'joint_output'
        else:
            layer_name = 'fully_connect'
        weights = model.model.get_layer(layer_name).get_weights()
        bias = weights[1]
        fc_weights = weights[0]
        np.save(fc_weights_file, fc_weights)
    else:
        fc_weights = np.load(fc_weights_file)
    # Normalized Data
    normalized = (fc_weights - np.min(fc_weights)) / (np.max(fc_weights) -
                                                      np.min(fc_weights))
    cnn_output = {}

    # nb_filters = cae_N_hidden
    if module == 'cnn_cae_concat':
        cnn_vector_length = 2
    else:
        cnn_vector_length = 3  #normalized.shape[0] / nb_filters
    for i in range(cnn_vector_length):
        print(i * nb_filters, i * nb_filters + (nb_filters - 1))
        cnn_output[i] = np.sum(normalized[i * nb_filters:i * nb_filters +
                                          nb_filters - 1],
                               axis=0,
                               keepdims=True)
    agg_output = np.vstack(cnn_output.values())
    if fc_weights.shape[0] > cnn_vector_length * nb_filters:
        #CAE output
        cae_output = np.sum(normalized[cnn_vector_length * nb_filters:],
                            axis=0,
                            keepdims=True)
        agg_output = np.vstack((agg_output, cae_output))
    plot_weights(agg_output, exp_dir)

    plot_model(model.model,
               to_file='/home/wanliz/model.png',
               show_layer_names=True,
               show_shapes=True)
    print('')
Пример #7
0
def ConvMF(res_dir, state_log_dir, train_user, train_item, valid_user, test_user,
           R, CNN_X, vocab_size, init_W=None, give_item_weight=False,
           max_iter=50, lambda_u=1, lambda_v=100, dimension=50,
           dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100):
    # explicit settinggit
    a = 1
    b = 0.01
    alpha = 40
    num_user = R.shape[0]
    num_item = R.shape[1]
    PREV_LOSS = -1e-50
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    os.chdir(res_dir)
    # f1 = open(res_dir + '/state.log', 'w')
    if not os.path.exists(state_log_dir):
        os.makedirs(state_log_dir)
    f1 = open(state_log_dir + '/state.log', 'w')
    # log metrics into tf.summary
    log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/'))
    log_dir = os.path.join(state_log_dir, log_dir_name)
    logger_tb = Tb_Logger(log_dir)

    # indicate folder to save, plus other options
    tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0,
                              write_graph=False, write_images=False)
    # save it in your callback list, where you can include other callbacks
    callbacks_list = [tensorboard]
    # then pass to fit as callback, remember to use validation_data also

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]

    no_validation = False
    if valid_user:
        Valid_R = valid_user[1]
    else:
        no_validation = True

    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i))
                                for i in Train_R_J], dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
        item_weight[item_weight == 0] = 1
    else:
        item_weight = np.ones(num_item, dtype=float)

    pre_val_eval = 1e10

    cnn_module = CNN_module(dimension, vocab_size, dropout_rate,
                            emb_dim, max_len, num_kernel_per_ws, init_W)
    theta = cnn_module.get_projection_layer(CNN_X)
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = theta

    print ('Training CNN-MF ...')

    endure_count = 5
    count = 0
    converge_threshold = 1e-4
    converge = 1.0
    iteration = 0
    while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter:
        # for iteration in xrange(max_iter):
        loss = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)

        # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        VV = (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in xrange(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            # A = VV + (a - b) * (V_i.T.dot(V_i))
            # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)
            C_i = np.diag(alpha * R_i)
            A = VV + V_i.T.dot(C_i).dot(V_i)
            B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i)
            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        sub_loss = np.zeros(num_item)
        # UU = b * (U.T.dot(U))
        UU = (U.T.dot(U))
        for j in xrange(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]
            C_j = np.diag(alpha * R_j)
            if len(U_j) > 0:
                # tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
                tmp_A = UU + (U_j.T.dot(C_j).dot(U_j))
                A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
                B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * theta[j]
                # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                #      ).sum(0) + lambda_v * item_weight[j] * theta[j]
                V[j] = np.linalg.solve(A, B)

                # sub_loss[j] = -0.5 * np.square(R_j * a).sum()
                # sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j)
                sub_loss[j] = -0.5 * np.square(R_j * C_j).sum()
                sub_loss[j] = sub_loss[j] + np.sum(C_j * (U_j.dot(V[j])) * R_j)
                sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])
            else:
                V[j] = theta[j]

        loss = loss + np.sum(sub_loss)
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V, item_weight, seed, callbacks_list)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        loss = loss - 0.5 * lambda_v * cnn_loss * num_item

        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        if not no_validation:
            val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        else:
            val_eval = -1
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        logger_tb.log_scalar('train_rmse', tr_eval, iteration)
        if not no_validation:
            logger_tb.log_scalar('eval_rmse', val_eval, iteration)
        logger_tb.log_scalar('test_rmse', te_eval, iteration)
        logger_tb.writer.flush()

        toc = time.time()
        elapsed = toc - tic

        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        # if (val_eval < pre_val_eval):

        if (loss > PREV_LOSS):
            # count = 0

            print ("likelihood is increasing!")
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            np.savetxt(res_dir + '/final-U.dat', U)
            np.savetxt(res_dir + '/final-V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
            best_train_rmse = tr_eval
            best_test_rmse = te_eval
            best_val_rmse = val_eval

        else:
            count = count + 1
        # if (val_eval < pre_val_eval):
        # count = 0

        #     cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
        #     np.savetxt(res_dir + '/final-U.dat', U)
        #     np.savetxt(res_dir + '/final-V.dat', V)
        #     np.savetxt(res_dir + '/theta.dat', theta)
        # else:
        #     count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval))

        if (count >= endure_count and iteration > min_iter):
            # if (count == endure_count):
            break
        elif (iteration < min_iter):
            count = 0

        PREV_LOSS = loss
        iteration += 1
    f1.close()
    return best_train_rmse, best_test_rmse, best_val_rmse
Пример #8
0
def Raw_att_CNN_concat(res_dir, state_log_dir, train_user, train_item, valid_user, test_user,
                       R, attributes_X, CNN_X, vocab_size, init_W, max_iter, lambda_u, lambda_v,
                       dimension, use_CAE,
                       dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100,
                       a=1, b=0.01, give_item_weight=False):
    # explicit setting
    # a = 1
    # b = 0.01
    alpha = 40
    # confidence_matrix = get_confidence_matrix(R,'user-dependant',alpha=40)
    num_user = R.shape[0]
    num_item = R.shape[1]

    num_features = attributes_X.shape[1]

    '''prepare path to store results and log'''
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    os.chdir(res_dir)
    if not os.path.exists(state_log_dir):
        os.makedirs(state_log_dir)
    f1 = open(state_log_dir + '/state.log', 'w')

    '''log metrics using tf.summary '''
    log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/'))
    log_dir = os.path.join(state_log_dir, log_dir_name)
    logger_tb = Tb_Logger(log_dir)
    # indicate folder to save, plus other options
    tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0,
                              write_graph=False, write_images=False)
    # save it in your callback list, where you can include other callbacks
    callbacks_list = [tensorboard]
    # then pass to fit as callback, remember to use validation_data also

    Train_R_I = train_user[1]
    Train_R_J = train_item[1]
    Test_R = test_user[1]

    # check if the dataset has validation set
    no_validation = False
    if valid_user:
        Valid_R = valid_user[1]
    else:
        no_validation = True

    # assign weights to each item according to the number of time the item was rated
    if give_item_weight is True:
        item_weight = np.array([math.sqrt(len(i))
                                for i in Train_R_J], dtype=float)
        item_weight = (float(num_item) / item_weight.sum()) * item_weight
        item_weight[item_weight == 0] = 1
    else:
        item_weight = np.ones(num_item, dtype=float)

    '''initialize'''
    cnn_output_dim = 150
    att_output_dim = dimension - cnn_output_dim
    cnn_module = CNN_module(cnn_output_dim, vocab_size, dropout_rate,
                            emb_dim, max_len, num_kernel_per_ws, init_W)
    if use_CAE:
        att_module = CAE_module(att_output_dim, cae_N_hidden=att_output_dim, nb_features=num_features)

    else:
        att_module = Stacking_NN_CNN_CAE(input_dim=num_features,output_dimesion=att_output_dim,
                                         num_layers=1, hidden_dim=num_features * 2)

    theta = cnn_module.get_projection_layer(CNN_X)
    gamma = att_module.get_projection_layer(attributes_X)
    delta = np.concatenate((gamma, theta), axis=1)
    if not (theta.shape[1] + gamma.shape[1] == dimension):
        sys.exit("theta and gamma shapes are wrong")
    np.random.seed(133)
    U = np.random.uniform(size=(num_user, dimension))
    V = delta

    print ('Training CNN-CAE-MF ...')
    pre_val_eval = -1e10
    PREV_LOSS = -1e-50
    endure_count = 5
    count = 0
    converge_threshold = 1e-4
    converge = 1.0
    iteration = 0
    while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter:
        # for iteration in xrange(max_iter):
        loss = 0
        tic = time.time()
        print "%d iteration\t(patience: %d)" % (iteration, count)

        # Update U
        # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension)
        VV = (V.T.dot(V)) + lambda_u * np.eye(dimension)
        sub_loss = np.zeros(num_user)

        for i in xrange(num_user):
            idx_item = train_user[0][i]
            V_i = V[idx_item]
            R_i = Train_R_I[i]
            C_i = np.diag(alpha * R_i)
            # A = VV + (a - b) * (V_i.T.dot(V_i))
            # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0)
            A = VV + V_i.T.dot(C_i).dot(V_i)
            B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i)
            U[i] = np.linalg.solve(A, B)

            sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i])

        loss = loss + np.sum(sub_loss)

        # Update V
        sub_loss = np.zeros(num_item)
        # UU = b * (U.T.dot(U))
        UU = (U.T.dot(U))

        for j in xrange(num_item):
            idx_user = train_item[0][j]
            U_j = U[idx_user]
            R_j = Train_R_J[j]
            C_j = np.diag(alpha * R_j)
            if len(U_j) > 0:
                # tmp_A = UU + (a - b) * (U_j.T.dot(U_j))
                tmp_A = UU + (U_j.T.dot(C_j).dot(U_j))
                A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension)
                # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)
                #      ).sum(0) + lambda_v * item_weight[j] * delta[j]
                B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * delta[j]
                V[j] = np.linalg.solve(A, B)

                sub_loss[j] = -0.5 * np.square(R_j * C_j).sum()
                sub_loss[j] = sub_loss[j] + np.sum(C_j * ((U_j.dot(V[j])) * R_j))
                sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j])
            else:
                # in case the item has no ratings
                V[j] = delta[j]
        loss = loss + np.sum(sub_loss)

        # Update theta
        seed = np.random.randint(100000)
        history = cnn_module.train(CNN_X, V[:, att_output_dim:], item_weight=item_weight,
                                   seed=seed, callbacks_list=callbacks_list)
        theta = cnn_module.get_projection_layer(CNN_X)
        cnn_loss = history.history['loss'][-1]

        # update gamma
        history = att_module.train(attributes_X, V[:, :att_output_dim], item_weight, seed, callbacks_list)
        gamma = att_module.get_projection_layer(attributes_X)
        att_loss = history.history['loss'][-1]
        # update delta
        delta = np.concatenate((gamma, theta), axis=1)
        loss = loss - 0.5 * lambda_v * (cnn_loss + att_loss) * num_item

        toc = time.time()
        elapsed = toc - tic

        '''calculate RMSE'''
        tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0])
        if not no_validation:
            val_eval = eval_RMSE(Valid_R, U, V, valid_user[0])
        else:
            val_eval = -1
        te_eval = eval_RMSE(Test_R, U, V, test_user[0])

        ''' write tf.summary'''
        logger_tb.log_scalar('train_rmse', tr_eval, iteration)
        if not no_validation:
            logger_tb.log_scalar('eval_rmse', val_eval, iteration)
        logger_tb.log_scalar('test_rmse', te_eval, iteration)
        logger_tb.writer.flush()

        '''Calculate converge and stor best values of U,V,theta'''
        converge = abs((loss - PREV_LOSS) / PREV_LOSS)

        # if (val_eval < pre_val_eval):
        if (loss > PREV_LOSS):
            # count = 0
            print ("likelihood is increasing!")
            cnn_module.save_model(res_dir + '/CNN_weights.hdf5')
            cnn_module.save_model(res_dir + '/Att_weights.hdf5')
            np.savetxt(res_dir + '/final-U.dat', U)
            np.savetxt(res_dir + '/final-V.dat', V)
            np.savetxt(res_dir + '/theta.dat', theta)
            np.savetxt(res_dir + '/gamma.dat', gamma)

            best_train_rmse = tr_eval
            best_test_rmse = te_eval
            best_val_rmse = val_eval

        else:
            count = count + 1

        pre_val_eval = val_eval

        print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval)
        f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (
            loss, elapsed, converge, tr_eval, val_eval, te_eval))
        if (count >= endure_count and iteration > min_iter):
            # if (count == endure_count):
            break
        elif (iteration < min_iter):
            count = 0

        PREV_LOSS = loss
        iteration += 1
    f1.close()
    return best_train_rmse, best_test_rmse, best_val_rmse