def ConvMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit setting a = 1 b = 0 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = theta endure_count = 5 count = 0 for iteration in range(max_iter): loss = 0 tic = time.time() print("%d iteration\t(patience: %d)" % (iteration, count)) VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in range(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in range(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) f1.write( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
def DRMF(res_dir, train_user, train_item, valid_user, test_user, R, DNN_X, DNN_Y, vocab_size, init_W=None, give_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, num_kernel_per_ws=50, dnn_type='CNN_GRU', reg_schema='Dual', gru_outdim=50, maxlen_doc=[10, 10], maxlen_sent=[30, 30]): # explicit setting num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'a') f1.write("### DRMF-%s-%s ###\n\n" % (reg_schema, dnn_type)) f1.write("===Configuration===\n") f1.write("lambda_u=%f, lambda_v=%f\n" % (lambda_u, lambda_v)) f1.write("maxlen_doc=[%d,%d], maxlen_sent=[%d,%d]\n" % (maxlen_doc[0], maxlen_doc[1], maxlen_sent[0], maxlen_sent[1])) f1.write( "emb_dim=%d, dimension=%d, num_kernel_per_ws=%d, dropout_rate=%.2f, gru_outdim=%d\n\n" % (emb_dim, dimension, num_kernel_per_ws, dropout_rate, gru_outdim)) f1.write("Tr:Training, Val:Validation, Te:Test, []: [MAE, MSE, RMSE]\n") Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight user_weight = np.array([math.sqrt(len(u)) for u in Train_R_I], dtype=float) user_weight = (float(num_user) / user_weight.sum()) * user_weight else: item_weight = np.ones(num_item, dtype=float) user_weight = np.ones(num_user, dtype=float) pre_val_rmse = 1e10 if dnn_type == 'CNN': if reg_schema == 'Item' or reg_schema == 'Dual': dnn_module_x = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, maxlen_doc[0] * maxlen_sent[0], num_kernel_per_ws, init_W) if reg_schema == 'User' or reg_schema == 'Dual': dnn_module_y = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, maxlen_doc[1] * maxlen_sent[1], num_kernel_per_ws, init_W) if dnn_type == 'CNN_GRU': if reg_schema == 'Item' or reg_schema == 'Dual': dnn_module_x = CNN_GRU_module(dimension, vocab_size, dropout_rate, emb_dim, gru_outdim, maxlen_doc[0], maxlen_sent[0], num_kernel_per_ws, init_W) if reg_schema == 'User' or reg_schema == 'Dual': dnn_module_y = CNN_GRU_module(dimension, vocab_size, dropout_rate, emb_dim, gru_outdim, maxlen_doc[1], maxlen_sent[1], num_kernel_per_ws, init_W) if reg_schema == 'Item' or reg_schema == 'Dual': theta = dnn_module_x.get_projection_layer(DNN_X) if reg_schema == 'User' or reg_schema == 'Dual': phi = dnn_module_y.get_projection_layer(DNN_Y) np.random.seed(133) if reg_schema == 'User' or reg_schema == 'Dual': U = phi else: U = np.random.uniform(size=(num_user, dimension)) if reg_schema == 'Item' or reg_schema == 'Dual': V = theta else: V = np.random.uniform(size=(num_item, dimension)) count = 0 for iteration in range(max_iter): loss = 0 tic = time.time() print("%d iteration\t(patience: %d)" % (iteration, count)) f1.write("%d iteration\t(patience: %d)\n" % (iteration, count)) VV = b * (V.T.dot(V)) sub_loss = np.zeros(num_user) # update U for i in range(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] tmp_A = VV + (a - b) * (V_i.T.dot(V_i)) A = tmp_A + lambda_u * user_weight[i] * np.eye(dimension) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) if reg_schema == 'User' or reg_schema == 'Dual': B = B + lambda_u * user_weight[i] * phi[i] U[i] = np.linalg.solve(A, B) # -\frac{\lambda_u}{2}\sum_i u_i^Tu_i if reg_schema == 'Item': sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss += np.sum(sub_loss) sub_loss_dev = np.zeros(num_item) sub_loss = np.zeros(num_item) # update V UU = b * (U.T.dot(U)) for j in range(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T)).sum(0) if reg_schema == 'Item' or reg_schema == 'Dual': B = B + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) # -\sum_i\sum_j\frac{c_{i,j}}{2}(r_{ij}-u_i^T v_j)^2 sub_loss_dev[j] = -0.5 * a * np.square(R_j).sum() sub_loss_dev[j] += a * np.sum((U_j.dot(V[j])) * R_j) sub_loss_dev[j] += -0.5 * np.dot(V[j].dot(tmp_A), V[j]) # -\frac{\lambda_v}{2}\sum_jv_j^Tv_j if reg_schema == 'User': sub_loss[j] = -0.5 * lambda_v * np.dot(V[j], V[j]) loss += np.sum(sub_loss_dev) loss += np.sum(sub_loss) seed = np.random.randint(100000) if reg_schema == 'Item' or reg_schema == 'Dual': history_x = dnn_module_x.train(DNN_X, V, item_weight, seed) theta = dnn_module_x.get_projection_layer(DNN_X) # -\frac{\lambda_v}{2}\sum_j(v_j-\theta_j)^T(v_j-\theta_j) cnn_loss_x = history_x.history['loss'][-1] loss += -0.5 * lambda_v * cnn_loss_x * num_item if reg_schema == 'User' or reg_schema == 'Dual': history_y = dnn_module_y.train(DNN_Y, U, user_weight, seed) phi = dnn_module_y.get_projection_layer(DNN_Y) # -\frac{\lambda_u}{2}\sum_i (u_i-\phi_i)^T(u_i-\phi_i) cnn_loss_y = history_y.history['loss'][-1] loss += -0.5 * lambda_u * cnn_loss_y * num_user tr_mae, tr_mse, tr_rmse = eval_RATING(Train_R_I, U, V, train_user[0]) val_mae, val_mse, val_rmse = eval_RATING(Valid_R, U, V, valid_user[0]) te_mae, te_mse, te_rmse = eval_RATING(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic if iteration == 0: converge = -1 else: converge = abs((loss - PREV_LOSS) / PREV_LOSS) # if (val_rmse < pre_val_rmse): # if dnn_type == 'CNN': # if os.path.exists(res_dir + '/drmf_cnn') is not True: # os.mkdir(res_dir + '/drmf_cnn') # if os.path.exists(res_dir + '/drmf_cnn/dual') is not True: # os.mkdir(res_dir + '/drmf_cnn/dual') # if os.path.exists(res_dir + '/drmf_cnn/user') is not True: # os.mkdir(res_dir + '/drmf_cnn/user') # if os.path.exists(res_dir + '/drmf_cnn/item') is not True: # os.mkdir(res_dir + '/drmf_cnn/item') # if reg_schema == 'Dual': # np.savetxt(res_dir + '/drmf_cnn/dual/U.dat', U) # np.savetxt(res_dir + '/drmf_cnn/dual/V.dat', V) # np.savetxt(res_dir + '/drmf_cnn/dual/theta.dat', theta) # np.savetxt(res_dir + '/drmf_cnn/dual/phi.dat', phi) # dnn_module_x.save_model(res_dir + '/drmf_cnn/dual/x_weights.hdf5') # dnn_module_y.save_model(res_dir + '/drmf_cnn/dual/y_weights.hdf5') # if reg_schema == 'User': # np.savetxt(res_dir + '/drmf_cnn/user/U.dat', U) # np.savetxt(res_dir + '/drmf_cnn/user/V.dat', V) # np.savetxt(res_dir + '/drmf_cnn/user/phi.dat', phi) # dnn_module_y.save_model(res_dir + '/drmf_cnn/user/y_weights.hdf5') # if reg_schema == 'Item': # np.savetxt(res_dir + '/drmf_cnn/item/U.dat', U) # np.savetxt(res_dir + '/drmf_cnn/item/V.dat', V) # np.savetxt(res_dir + '/drmf_cnn/item/theta.dat', theta) # dnn_module_x.save_model(res_dir + '/drmf_cnn/item/x_weights.hdf5') # if dnn_type == 'CNN_GRU': # if os.path.exists(res_dir + '/drmf_cnn_gru') is not True: # os.mkdir(res_dir + '/drmf_cnn_gru') # if os.path.exists(res_dir + '/drmf_cnn_gru/dual') is not True: # os.mkdir(res_dir + '/drmf_cnn_gru/dual') # if os.path.exists(res_dir + '/drmf_cnn_gru/user') is not True: # os.mkdir(res_dir + '/drmf_cnn_gru/user') # if os.path.exists(res_dir + '/drmf_cnn_gru/item') is not True: # os.mkdir(res_dir + '/drmf_cnn_gru/item') # if reg_schema == 'Dual': # np.savetxt(res_dir + '/drmf_cnn_gru/dual/U.dat', U) # np.savetxt(res_dir + '/drmf_cnn_gru/dual/V.dat', V) # np.savetxt(res_dir + '/drmf_cnn_gru/dual/theta.dat', theta) # np.savetxt(res_dir + '/drmf_cnn_gru/dual/phi.dat', phi) # dnn_module_x.save_model(res_dir + '/drmf_cnn_gru/dual/x_weights.hdf5') # dnn_module_y.save_model(res_dir + '/drmf_cnn_gru/dual/y_weights.hdf5') # if reg_schema == 'User': # np.savetxt(res_dir + '/drmf_cnn_gru/user/U.dat', U) # np.savetxt(res_dir + '/drmf_cnn_gru/user/V.dat', V) # np.savetxt(res_dir + '/drmf_cnn_gru/user/phi.dat', phi) # dnn_module_y.save_model(res_dir + '/drmf_cnn_gru/user/y_weights.hdf5') # if reg_schema == 'Item': # np.savetxt(res_dir + '/drmf_cnn_gru/item/U.dat', U) # np.savetxt(res_dir + '/drmf_cnn_gru/item/V.dat', V) # np.savetxt(res_dir + '/drmf_cnn_gru/item/theta.dat', theta) # dnn_module_x.save_model(res_dir + '/drmf_cnn_gru/item/x_weights.hdf5') # else: # count = count + 1 # for fast running, without saving models if (val_rmse >= pre_val_rmse): count = count + 1 pre_val_rmse = val_rmse print( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: [%.5f, %.5f, %.5f] Val: [%.5f, %.5f, %.5f] Te: [%.5f, %.5f, %.5f] " % (loss, elapsed, converge, tr_mae, tr_mse, tr_rmse, val_mae, val_mse, val_rmse, te_mae, te_mse, te_rmse)) f1.write( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: [%.5f, %.5f, %.5f] Val: [%.5f, %.5f, %.5f] Te: [%.5f, %.5f, %.5f]\n" % (loss, elapsed, converge, tr_mae, tr_mse, tr_rmse, val_mae, val_mse, val_rmse, te_mae, te_mse, te_rmse)) if (count == ENDURE_COUNT): break PREV_LOSS = loss f1.close()
def ConvMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): ''' 构造并训练卷积矩阵分解模型 :param res_dir:结果文件路径 :param train_user:训练集用户稀疏评分向量(libSVM format) :param train_item:训练集物品稀疏评分向量(libSVM format) :param valid_user:测试集用户稀疏评分向量(libSVM format) :param test_user:测试集物品稀疏评分向量(libSVM format) :param R:原始评分数据,format: user id::item id::rating :param CNN_X:物品描述词序列 :param vocab_size:词表大小 :param init_W:如果为None则动态训练词向量权重 :param give_item_weight:如果为True则使用静态词向量,否则动态训练词向量 :param max_iter:最大迭代次数 :param lambda_u:用户端正则惩罚项系数 :param lambda_v:用户端正则惩罚项系数 :param dimension:隐变量维度 :param dropout_rate:丢弃率 :param emb_dim:词嵌入维度 :param max_len:物品文本描述序列最大长度 :param num_kernel_per_ws:CNN的卷积核个数 :return:None ''' # explicit setting a = 1 b = 0 num_user = R.shape[0] #6040 num_item = R.shape[1] #3544 PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] #user rating_list Train_R_J = train_item[1] #item rating_list Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 ## init CNN model cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) # user-latent matrix U = np.random.uniform(size=(num_user, dimension)) # item-latent matrix V = theta endure_count = 5 #超出5次则退出迭代训练 count = 0 for iteration in range(max_iter): loss = 0 tic = time.time() print("%d iteration\t(patience: %d)" % (iteration, count)) ##get user-latent matirx loss VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) #theta V sub_loss = np.zeros(num_user) for i in range(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) ##get item-latent matirx loss sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in range(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) # get cnn loss history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item # get rmse eval tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) #save u,v,w weight if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) f1.write( "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
def Haec(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=30, lambda_u=1, lambda_v=1, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit setting a = 1 b = 0 num_user = R.shape[0] num_item = R.shape[1] # print '=====R.all========' # print num_user,num_item fileU = '../data/pre/ml_1m/User.npy' # fileR = "../Data/convmf/preprocessed/movielens_100k/R.npy" Uinfo = getData1(fileUser=fileU) PREV_LOSS = 1e-50 PREV_TE = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') # Train_R_I按用户进行汇总的评分列表[[用户1的所有评分][用户2的所有评分]...],长6040 Train_R_I = train_user[1] # Train_R_I按产品进行汇总的评分列表[[产品1的所有评分][产品2的所有评分]...],长3544 Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: #原理:评分越多的产品,权重越大 # item_weight:"每个产品对应的用户评论数的开方" item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) # item_weight=item_weight*(产品数/所有产品对应的评论的开方之和),类似归一化 # 处理后,每个Item对应的权重限制在(0-5)之间,float。 item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) # theta,cnn输出层,litst,长度3544,每个元素为长50的list,数字,取值(-0.06,0.039)。 theta = cnn_module.get_projection_layer(CNN_X) V = theta #随机初始化前,设置种子seed,便于复现 np.random.seed(133) # # # U:6040list(6040,50),V:长为3544的list,每个list为含50个元素的子list。 U = np.random.uniform(size=(num_user, dimension)) # g1 = tf.Graph() # with tf.Session(graph=tf.get_default_graph()) as sess: config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' sess = tf.Session(config=config) model = USDAE1(sess, R.shape, Uinfo.shape, is_training=True, **mlp_args) print("build model...") model.build() u_loss, ulatent = model.train(R.toarray(), Uinfo, U, mlp_args["learning_rate"]) U = ulatent endure_count = 5 count = 0 for iteration in xrange(max_iter): loss = 0 mcount = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) #公式7的一部分,b被设置为0,所以VV为:值为lambda_u*I_k VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) # 令偏导为0,得到所有的U[i],并求U部分的loss #初始化U部分的loss,共计num_user(6040)个 sub_loss = np.zeros(num_user) for i in xrange(num_user): # idx_item:用户i评论过的电影ID列表 idx_item = train_user[0][i] #V_i:从V中筛选出用户i所评论的item。长度(主List):用户i对应的评论数,子list:50. V_i = V[idx_item] #R_i用户i的所有评分,size(1,len(idx_item)) R_i = Train_R_I[i] #A,size(1,50),公式7求逆的部分,每个子元素size(1,50) A = VV + (a - b) * (V_i.T.dot(V_i)) # B,size(1,50),公式7后面的部分,每个子元素为一个数。 # np.tile(A,rep):重复rep次A来构建array; B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) + lambda_u * ulatent[i].T # linalg线性代数模块,solve(A,B),求解Ax=B 线性方程组 U[i] = np.linalg.solve(A, B) # U部分的Loss sub_loss[i] = -0.5 * lambda_u * (np.sum( np.square(U[i] - ulatent[i]))) loss = loss + np.sum(sub_loss) # 令偏导为0,得到所有的V[j],并求V部分的loss # 初始化V部分的loss,共计num_item(3544)个 sub_loss = np.zeros(num_item) # b为0 UU = b * (U.T.dot(U)) for j in xrange(num_item): # idx_user:产品j被评论的用户ID列表 idx_user = train_item[0][j] # U_j:从U中筛选出在idx_user出现过的。长度(主List):产品i对应的评论数,子list:50. U_j = U[idx_user] R_j = Train_R_J[j] # tmp_A =(U_j.T.dot(U_j)) tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) # A=公式8求逆的部分,多加入了item_weight[j] A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) # B=公式8的后半部分,注意cnn部分loss=lambda_v * item_weight[j] * theta[j] B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] # print '================num_item=============', num_item # print len(theta[j]) # print 'A.shape:', A.shape # print 'B.shape:', B.shape V[j] = np.linalg.solve(A, B) #下面三个式子:公式6的第一项,(R-UV)^2 sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] u_loss, ulatent = model.train(R.toarray(), Uinfo, U, mlp_args["learning_rate"]) loss = loss - 0.5 * lambda_v * cnn_loss * num_item - u_loss tr_eval = eval_MAE(Train_R_I, U, V, train_user[0]) val_eval = eval_MAE(Valid_R, U, V, valid_user[0]) te_eval = eval_MAE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) if te_eval > PREV_TE: mcount += 1 if mcount > 2: break if (val_eval < pre_val_eval): # cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write( "Iteration:%d Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % (iteration, loss, elapsed, converge, tr_eval, val_eval, te_eval)) # if (count == endure_count): # break PREV_LOSS = loss PREV_TE = te_eval f1.close()
def ConvMF(res_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=True, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit setting a = 1 b = 0 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = 1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) f1 = open(res_dir + '/state.log', 'w') Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] Valid_R = valid_user[1] if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = theta endure_count = 5 count = 0 for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] A = VV + (a - b) * (V_i.T.dot(V_i)) B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) sub_loss = np.zeros(num_item) UU = b * (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * a).sum() sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) te_eval = eval_RMSE(Test_R, U, V, test_user[0]) toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) if (val_eval < pre_val_eval): cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/U.dat', U) np.savetxt(res_dir + '/V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) else: count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count == endure_count): break PREV_LOSS = loss f1.close()
def main(): #cnn_cae_transfer # exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/5-8_in-matrix_no-val_0.01-100_w_cnn-100_cae-50_transfer-2_no-sc//fold-4/' exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/6-9_in-matrix-200_no-val_0.01-100-w-cnn-100_cae-25_transfer-noSC/fold-4/' exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/7-9_in-matrix-200_no-val_0.01-100-w-cnn-50_cae-150_transfer-noSC/fold-1' #cnn_cae_concat # exp_dir ='/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/6-8_out-of-matrix_no-val_1-1000-w-cnn-100_cae-50_concat/fold-1/' #cnn # exp_dir = '/home/wanliz/data/Extended_ctr/convmf/citeulike_a_extended/results/6-8_in-matrix_no-val_0.01-100_w_cnn/fold-1' fc_weights_file = os.path.join(exp_dir, 'FC_weights.npy') module = 'cnn_cae_transfer' ''' Network parameters''' nb_filters = 50 cae_N_hidden = 150 nb_features = 23 if module == 'cnn_cae_transfer' or module == 'cnn_cae_concat': weights_file = os.path.join(exp_dir, 'CNN_CAE_weights.hdf5') elif module == 'cnn': weights_file = os.path.join(exp_dir, 'CNN_weights.hdf5') if not os.path.exists(fc_weights_file): if module == 'cnn_cae_transfer': model = CNN_CAE_transfer_module(output_dimesion=200, vocab_size=8001, dropout_rate=0.2, emb_dim=200, max_len=300, nb_filters=nb_filters, init_W=None, cae_N_hidden=cae_N_hidden, nb_features=nb_features) elif module == 'cnn_cae_concat': model = CNN_CAE_module(output_dimesion=200, vocab_size=8001, dropout_rate=0.2, emb_dim=200, max_len=300, nb_filters=nb_filters, init_W=None, cae_N_hidden=cae_N_hidden, nb_features=17) elif module == 'cnn': model = CNN_module(output_dimesion=200, vocab_size=8001, dropout_rate=0.2, emb_dim=200, max_len=300, nb_filters=nb_filters, init_W=None) model.load_model(weights_file) if module == 'cnn_cae_concat': layer_name = 'joint_output' else: layer_name = 'fully_connect' weights = model.model.get_layer(layer_name).get_weights() bias = weights[1] fc_weights = weights[0] np.save(fc_weights_file, fc_weights) else: fc_weights = np.load(fc_weights_file) # Normalized Data normalized = (fc_weights - np.min(fc_weights)) / (np.max(fc_weights) - np.min(fc_weights)) cnn_output = {} # nb_filters = cae_N_hidden if module == 'cnn_cae_concat': cnn_vector_length = 2 else: cnn_vector_length = 3 #normalized.shape[0] / nb_filters for i in range(cnn_vector_length): print(i * nb_filters, i * nb_filters + (nb_filters - 1)) cnn_output[i] = np.sum(normalized[i * nb_filters:i * nb_filters + nb_filters - 1], axis=0, keepdims=True) agg_output = np.vstack(cnn_output.values()) if fc_weights.shape[0] > cnn_vector_length * nb_filters: #CAE output cae_output = np.sum(normalized[cnn_vector_length * nb_filters:], axis=0, keepdims=True) agg_output = np.vstack((agg_output, cae_output)) plot_weights(agg_output, exp_dir) plot_model(model.model, to_file='/home/wanliz/model.png', show_layer_names=True, show_shapes=True) print('')
def ConvMF(res_dir, state_log_dir, train_user, train_item, valid_user, test_user, R, CNN_X, vocab_size, init_W=None, give_item_weight=False, max_iter=50, lambda_u=1, lambda_v=100, dimension=50, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100): # explicit settinggit a = 1 b = 0.01 alpha = 40 num_user = R.shape[0] num_item = R.shape[1] PREV_LOSS = -1e-50 if not os.path.exists(res_dir): os.makedirs(res_dir) os.chdir(res_dir) # f1 = open(res_dir + '/state.log', 'w') if not os.path.exists(state_log_dir): os.makedirs(state_log_dir) f1 = open(state_log_dir + '/state.log', 'w') # log metrics into tf.summary log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/')) log_dir = os.path.join(state_log_dir, log_dir_name) logger_tb = Tb_Logger(log_dir) # indicate folder to save, plus other options tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_images=False) # save it in your callback list, where you can include other callbacks callbacks_list = [tensorboard] # then pass to fit as callback, remember to use validation_data also Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] no_validation = False if valid_user: Valid_R = valid_user[1] else: no_validation = True if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight item_weight[item_weight == 0] = 1 else: item_weight = np.ones(num_item, dtype=float) pre_val_eval = 1e10 cnn_module = CNN_module(dimension, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) theta = cnn_module.get_projection_layer(CNN_X) np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = theta print ('Training CNN-MF ...') endure_count = 5 count = 0 converge_threshold = 1e-4 converge = 1.0 iteration = 0 while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter: # for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) VV = (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] # A = VV + (a - b) * (V_i.T.dot(V_i)) # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) C_i = np.diag(alpha * R_i) A = VV + V_i.T.dot(C_i).dot(V_i) B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) sub_loss = np.zeros(num_item) # UU = b * (U.T.dot(U)) UU = (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] C_j = np.diag(alpha * R_j) if len(U_j) > 0: # tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) tmp_A = UU + (U_j.T.dot(C_j).dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * theta[j] # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) # ).sum(0) + lambda_v * item_weight[j] * theta[j] V[j] = np.linalg.solve(A, B) # sub_loss[j] = -0.5 * np.square(R_j * a).sum() # sub_loss[j] = sub_loss[j] + a * np.sum((U_j.dot(V[j])) * R_j) sub_loss[j] = -0.5 * np.square(R_j * C_j).sum() sub_loss[j] = sub_loss[j] + np.sum(C_j * (U_j.dot(V[j])) * R_j) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) else: V[j] = theta[j] loss = loss + np.sum(sub_loss) seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V, item_weight, seed, callbacks_list) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] loss = loss - 0.5 * lambda_v * cnn_loss * num_item tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) if not no_validation: val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) else: val_eval = -1 te_eval = eval_RMSE(Test_R, U, V, test_user[0]) logger_tb.log_scalar('train_rmse', tr_eval, iteration) if not no_validation: logger_tb.log_scalar('eval_rmse', val_eval, iteration) logger_tb.log_scalar('test_rmse', te_eval, iteration) logger_tb.writer.flush() toc = time.time() elapsed = toc - tic converge = abs((loss - PREV_LOSS) / PREV_LOSS) # if (val_eval < pre_val_eval): if (loss > PREV_LOSS): # count = 0 print ("likelihood is increasing!") cnn_module.save_model(res_dir + '/CNN_weights.hdf5') np.savetxt(res_dir + '/final-U.dat', U) np.savetxt(res_dir + '/final-V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) best_train_rmse = tr_eval best_test_rmse = te_eval best_val_rmse = val_eval else: count = count + 1 # if (val_eval < pre_val_eval): # count = 0 # cnn_module.save_model(res_dir + '/CNN_weights.hdf5') # np.savetxt(res_dir + '/final-U.dat', U) # np.savetxt(res_dir + '/final-V.dat', V) # np.savetxt(res_dir + '/theta.dat', theta) # else: # count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count >= endure_count and iteration > min_iter): # if (count == endure_count): break elif (iteration < min_iter): count = 0 PREV_LOSS = loss iteration += 1 f1.close() return best_train_rmse, best_test_rmse, best_val_rmse
def Raw_att_CNN_concat(res_dir, state_log_dir, train_user, train_item, valid_user, test_user, R, attributes_X, CNN_X, vocab_size, init_W, max_iter, lambda_u, lambda_v, dimension, use_CAE, dropout_rate=0.2, emb_dim=200, max_len=300, num_kernel_per_ws=100, a=1, b=0.01, give_item_weight=False): # explicit setting # a = 1 # b = 0.01 alpha = 40 # confidence_matrix = get_confidence_matrix(R,'user-dependant',alpha=40) num_user = R.shape[0] num_item = R.shape[1] num_features = attributes_X.shape[1] '''prepare path to store results and log''' if not os.path.exists(res_dir): os.makedirs(res_dir) os.chdir(res_dir) if not os.path.exists(state_log_dir): os.makedirs(state_log_dir) f1 = open(state_log_dir + '/state.log', 'w') '''log metrics using tf.summary ''' log_dir_name = os.path.basename(os.path.dirname(state_log_dir + '/')) log_dir = os.path.join(state_log_dir, log_dir_name) logger_tb = Tb_Logger(log_dir) # indicate folder to save, plus other options tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_images=False) # save it in your callback list, where you can include other callbacks callbacks_list = [tensorboard] # then pass to fit as callback, remember to use validation_data also Train_R_I = train_user[1] Train_R_J = train_item[1] Test_R = test_user[1] # check if the dataset has validation set no_validation = False if valid_user: Valid_R = valid_user[1] else: no_validation = True # assign weights to each item according to the number of time the item was rated if give_item_weight is True: item_weight = np.array([math.sqrt(len(i)) for i in Train_R_J], dtype=float) item_weight = (float(num_item) / item_weight.sum()) * item_weight item_weight[item_weight == 0] = 1 else: item_weight = np.ones(num_item, dtype=float) '''initialize''' cnn_output_dim = 150 att_output_dim = dimension - cnn_output_dim cnn_module = CNN_module(cnn_output_dim, vocab_size, dropout_rate, emb_dim, max_len, num_kernel_per_ws, init_W) if use_CAE: att_module = CAE_module(att_output_dim, cae_N_hidden=att_output_dim, nb_features=num_features) else: att_module = Stacking_NN_CNN_CAE(input_dim=num_features,output_dimesion=att_output_dim, num_layers=1, hidden_dim=num_features * 2) theta = cnn_module.get_projection_layer(CNN_X) gamma = att_module.get_projection_layer(attributes_X) delta = np.concatenate((gamma, theta), axis=1) if not (theta.shape[1] + gamma.shape[1] == dimension): sys.exit("theta and gamma shapes are wrong") np.random.seed(133) U = np.random.uniform(size=(num_user, dimension)) V = delta print ('Training CNN-CAE-MF ...') pre_val_eval = -1e10 PREV_LOSS = -1e-50 endure_count = 5 count = 0 converge_threshold = 1e-4 converge = 1.0 iteration = 0 while (iteration < max_iter and converge > converge_threshold) or iteration < min_iter: # for iteration in xrange(max_iter): loss = 0 tic = time.time() print "%d iteration\t(patience: %d)" % (iteration, count) # Update U # VV = b * (V.T.dot(V)) + lambda_u * np.eye(dimension) VV = (V.T.dot(V)) + lambda_u * np.eye(dimension) sub_loss = np.zeros(num_user) for i in xrange(num_user): idx_item = train_user[0][i] V_i = V[idx_item] R_i = Train_R_I[i] C_i = np.diag(alpha * R_i) # A = VV + (a - b) * (V_i.T.dot(V_i)) # B = (a * V_i * (np.tile(R_i, (dimension, 1)).T)).sum(0) A = VV + V_i.T.dot(C_i).dot(V_i) B = V_i.T.dot(C_i + np.eye(len(idx_item))).dot(R_i) U[i] = np.linalg.solve(A, B) sub_loss[i] = -0.5 * lambda_u * np.dot(U[i], U[i]) loss = loss + np.sum(sub_loss) # Update V sub_loss = np.zeros(num_item) # UU = b * (U.T.dot(U)) UU = (U.T.dot(U)) for j in xrange(num_item): idx_user = train_item[0][j] U_j = U[idx_user] R_j = Train_R_J[j] C_j = np.diag(alpha * R_j) if len(U_j) > 0: # tmp_A = UU + (a - b) * (U_j.T.dot(U_j)) tmp_A = UU + (U_j.T.dot(C_j).dot(U_j)) A = tmp_A + lambda_v * item_weight[j] * np.eye(dimension) # B = (a * U_j * (np.tile(R_j, (dimension, 1)).T) # ).sum(0) + lambda_v * item_weight[j] * delta[j] B = U_j.T.dot(C_j + np.eye(len(idx_user))).dot(R_j) + lambda_v * item_weight[j] * delta[j] V[j] = np.linalg.solve(A, B) sub_loss[j] = -0.5 * np.square(R_j * C_j).sum() sub_loss[j] = sub_loss[j] + np.sum(C_j * ((U_j.dot(V[j])) * R_j)) sub_loss[j] = sub_loss[j] - 0.5 * np.dot(V[j].dot(tmp_A), V[j]) else: # in case the item has no ratings V[j] = delta[j] loss = loss + np.sum(sub_loss) # Update theta seed = np.random.randint(100000) history = cnn_module.train(CNN_X, V[:, att_output_dim:], item_weight=item_weight, seed=seed, callbacks_list=callbacks_list) theta = cnn_module.get_projection_layer(CNN_X) cnn_loss = history.history['loss'][-1] # update gamma history = att_module.train(attributes_X, V[:, :att_output_dim], item_weight, seed, callbacks_list) gamma = att_module.get_projection_layer(attributes_X) att_loss = history.history['loss'][-1] # update delta delta = np.concatenate((gamma, theta), axis=1) loss = loss - 0.5 * lambda_v * (cnn_loss + att_loss) * num_item toc = time.time() elapsed = toc - tic '''calculate RMSE''' tr_eval = eval_RMSE(Train_R_I, U, V, train_user[0]) if not no_validation: val_eval = eval_RMSE(Valid_R, U, V, valid_user[0]) else: val_eval = -1 te_eval = eval_RMSE(Test_R, U, V, test_user[0]) ''' write tf.summary''' logger_tb.log_scalar('train_rmse', tr_eval, iteration) if not no_validation: logger_tb.log_scalar('eval_rmse', val_eval, iteration) logger_tb.log_scalar('test_rmse', te_eval, iteration) logger_tb.writer.flush() '''Calculate converge and stor best values of U,V,theta''' converge = abs((loss - PREV_LOSS) / PREV_LOSS) # if (val_eval < pre_val_eval): if (loss > PREV_LOSS): # count = 0 print ("likelihood is increasing!") cnn_module.save_model(res_dir + '/CNN_weights.hdf5') cnn_module.save_model(res_dir + '/Att_weights.hdf5') np.savetxt(res_dir + '/final-U.dat', U) np.savetxt(res_dir + '/final-V.dat', V) np.savetxt(res_dir + '/theta.dat', theta) np.savetxt(res_dir + '/gamma.dat', gamma) best_train_rmse = tr_eval best_test_rmse = te_eval best_val_rmse = val_eval else: count = count + 1 pre_val_eval = val_eval print "Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval) f1.write("Loss: %.5f Elpased: %.4fs Converge: %.6f Tr: %.5f Val: %.5f Te: %.5f\n" % ( loss, elapsed, converge, tr_eval, val_eval, te_eval)) if (count >= endure_count and iteration > min_iter): # if (count == endure_count): break elif (iteration < min_iter): count = 0 PREV_LOSS = loss iteration += 1 f1.close() return best_train_rmse, best_test_rmse, best_val_rmse