def train(args, data_info, show_loss): train_data = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] model = RippleNet(args, n_entity, n_relation) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(args.n_epoch): # training np.random.shuffle(train_data) start = 0 while start < train_data.shape[0]: _, loss = model.train( sess, get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size)) start += args.batch_size if show_loss: print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss)) # evaluation train_auc, train_acc = evaluation(sess, args, model, train_data, ripple_set, args.batch_size) eval_auc, eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size) test_auc, test_acc = evaluation(sess, args, model, test_data, ripple_set, args.batch_size) print('epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc)) ### +Added Instructions case_rec_evaluation(sess, args, model, test_data, ripple_set, args.batch_size)
def train(args, data_info, show_loss, config): train_data = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] logger = Logger() logger.create_session_folder(SESSION_LOG_PATH) logger.set_default_filename(SESSION_LOG_PATH + "log.txt") logger.log(str(args)) # Log training and model hyper parameters model = RippleNet(args, n_entity, n_relation) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) for step in range(args.n_epoch): np.random.shuffle(train_data) # training for i in tqdm(range(0, train_data.shape[0], args.batch_size)): _, loss = model.train( sess, _get_feed_dict(args, model, train_data, ripple_set, i, i + args.batch_size)) if show_loss: print('%.1f%% %.4f' % (i / train_data.shape[0] * 100, loss)) logger.log('%.1f%% %.4f' % (i / train_data.shape[0] * 100, loss)) # evaluation train_auc, train_acc = _evaluation(sess, args, model, train_data, ripple_set) eval_auc, eval_acc = _evaluation(sess, args, model, eval_data, ripple_set) test_auc, test_acc = _evaluation(sess, args, model, test_data, ripple_set) # Save the variables to disk. saver.save(sess, SESSION_LOG_PATH + "models/epoch_{}".format(step)) print( 'epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc)) logger.log( 'epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))
def train(args, data_info, show_loss, config): train_data = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] logger.log(str(args)) model = RippleNet(args, n_entity, n_relation) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) for step in range(args.n_epoch): # training np.random.shuffle(train_data) start = 0 for start in tqdm(range(0, train_data.shape[0], args.batch_size)): _, loss = model.train( sess, get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size)) if show_loss: print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss)) logger.log('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss)) # evaluation train_auc, train_acc = evaluation(sess, args, model, train_data, ripple_set, args.batch_size) eval_auc, eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size) test_auc, test_acc = evaluation(sess, args, model, test_data, ripple_set, args.batch_size) # Save the variables to disk. saver.save(sess, session_log_path + "models/epoch_{}".format(step)) print( 'epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc)) logger.log( 'epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))
def train(args, data_info, show_loss): # train 方法需要用到data info 是从 main 的 load data 方法里来的 train_data = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] adj_entity = data_info[6] adj_relation = data_info[7] model = RippleNet(args, n_entity, n_relation, adj_entity, adj_relation) # 在 train 里 用到model if args.use_cuda: model.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.lr, weight_decay=args.l2_weight) for step in range(args.n_epoch): # training np.random.shuffle(train_data) start = 0 while start < train_data.shape[0]: return_dict = model(*get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size)) loss = return_dict["loss"] optimizer.zero_grad() loss.backward() optimizer.step() start += args.batch_size if show_loss: print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss.item())) # evaluation train_auc, train_acc = evaluation(args, model, train_data, ripple_set, args.batch_size) eval_auc, eval_acc = evaluation(args, model, eval_data, ripple_set, args.batch_size) test_auc, test_acc = evaluation(args, model, test_data, ripple_set, args.batch_size) print( 'epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))
def train(args, data, show_loss): sample_data = data[5] train_set = data[0] test_set = data[1] num_entity = data[2] ripple_set = data[4] num_rel = data[3] model = RippleNet(args, num_entity, num_rel) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_accs = [] test_accs = [] for s in range(args.n_epoch): np.random.shuffle(train_set) st = 0 while st < train_set.shape[0]: _, loss = model.train( sess, get_feed_dict(args, model, train_set, ripple_set, st, st + args.batch_size)) st += args.batch_size if show_loss: print('%.1f%% %.4f' % (st / train_set.shape[0] * 100, loss)) # evaluation sample = True train_auc, train_acc = evaluation(sess, args, model, train_set, ripple_set, args.batch_size, not sample) test_auc, test_acc = evaluation(sess, args, model, test_set, ripple_set, args.batch_size, not sample) sample_auc, sample_acc = evaluation(sess, args, model, sample_data, ripple_set, sample_data.shape[0], sample) train_accs.append(train_acc) test_accs.append(test_acc) print( 'epoch %d train auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' % (s, train_auc, train_acc, test_auc, test_acc)) x_axis = [] for i in range(len(train_accs)): x_axis.append(i) axes = plt1.gca() axes.set_ylim([0, 1]) plt1.plot(x_axis, train_accs, 'r') plt1.plot(x_axis, test_accs, 'g') plt1.savefig('test2png.png', dpi=100)
def train(args, data_info, show_loss): train_data_just_ui = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] uidx_negs = data_info[6] # -------------- record u_is and neg_sampling for training ------------- # uidxs, iidxs, u_is = get_u_is(train_data_just_ui) # generate_neg_sampling(u_is, iidxs, train_data) model = RippleNet(args, n_entity, n_relation) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size) #test(sess, args, model, test_data, uidx_negs, ripple_set, args.batch_size) for step in range(args.n_epoch): start_time = time.time() train_data = generate_neg_sampling(u_is, iidxs, train_data_just_ui) print("negative sampling done. %f s" % (time.time() - start_time)) np.random.shuffle(train_data) eval_data = train_data[-4096:, :] start = 0 while start < train_data.shape[0]: feed_dict = get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size) feed_dict[model.global_step] = step _, loss = model.train(sess, feed_dict) start += args.batch_size #if start % 102400 == 0: #if show_loss: #print(start) #print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss)) # evaluation #train_acc = evaluation(sess, args, model, train_data, ripple_set, args.batch_size) eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size) test(sess, args, model, test_data, uidx_negs, ripple_set, args.batch_size) print('epoch %d train acc: %.4f eval acc: %.4f ' % (step, eval_acc, eval_acc))
def train(args, data_info, show_loss): train_data = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] model = RippleNet(args, n_entity, n_relation) if args.use_cuda: model.cuda() optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), args.lr, ) for step in range(args.n_epoch): # training np.random.shuffle(train_data) start = 0 while start < train_data.shape[0]: return_dict = model(*get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size)) loss = return_dict["loss"] optimizer.zero_grad() loss.backward() optimizer.step() start += args.batch_size if show_loss: print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss.item())) # evaluation # train_auc, train_acc, train_precision, train_recall, train_ndcg = evaluation(args, model, train_data, ripple_set, args.batch_size) # eval_auc, eval_acc, eval_precision, eval_recall, eval_ndcg = evaluation(args, model, eval_data, ripple_set, args.batch_size) test_precision, test_recall, test_ndcg = evaluation( args, model, test_data, ripple_set, args.batch_size) print('epoch %d test precision: %.4f recall: %.4f ndcg: %.4f' % (step, test_precision, test_recall, test_ndcg))
def train(args, data_info, show_loss): train_data = data_info[0] eval_data = data_info[1] test_data = data_info[2] n_entity = data_info[3] n_relation = data_info[4] ripple_set = data_info[5] model = RippleNet(args, n_entity, n_relation) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(args.n_epoch): # training np.random.shuffle(train_data) start = 0 while start < train_data.shape[0]: _, loss = model.train( sess, get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size)) start += args.batch_size if show_loss: print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss), end='\r') # evaluation train_auc, train_acc = evaluation(sess, args, model, train_data, ripple_set, args.batch_size) # eval_auc, eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size) # test_auc, test_acc = evaluation(sess, args, model, test_data, ripple_set, args.batch_size) # print('epoch %d train auc: %.4f acc: %.4f eval auc: %.4f acc: %.4f test auc: %.4f acc: %.4f' # % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc)) print('epoch %d train auc: %.4f acc: %.4f' % (step, train_auc, train_acc)) # test user_list, item_list, score_list = test(sess, args, model, test_data, ripple_set, args.batch_size) persistence(user_list, item_list, score_list, args.dataset) return model
def train(args, data_info, logger): train_data, eval_data, test_data = data_info[0], data_info[1], data_info[2] n_item, n_user = data_info[3], data_info[4] n_entity, n_relation = data_info[5], data_info[6] ripple_set = data_info[7] if args.show_save_dataset_info: print( f'train({len(train_data)}), eval({len(eval_data)}), test({len(test_data)})' ) if args.topk_eval: _, eval_record, test_record, topk_data = topk_settings( args, train_data, eval_data, test_data, n_item) # create dataset train_dataset = get_dataset(train_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) eval_dataset = get_dataset(eval_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) test_dataset = get_dataset(test_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) if args.topk_eval: topk_dataset = get_dataset(topk_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) # init early stop controller early_stop = Early_stop_info(args) config = tf.ConfigProto() # config.gpu_options.allow_growth=True config.gpu_options.per_process_gpu_memory_fraction = args.gpu_fract with tf.Session(config=config) as sess: model = RippleNet(args, n_entity, n_relation, train_dataset) init = tf.global_variables_initializer() sess.run(init) # load emb from previous stage if args.load_emb == True: print('load pretrained emb ...') model.initialize_pretrained_embeddings(sess) for epoch in range(args.n_epoch): scores = {t: {} for t in ['train', 'eval', 'test']} train_dataset.shuffle(buffer_size=1024) model.iter_init(sess, train_dataset) # start to train t_start = time() try: while True: model.train(sess) except tf.errors.OutOfRangeError: pass t_flag = time() # evaluation scores['train'] = evaluation(sess, model, train_dataset) scores['eval'] = evaluation(sess, model, eval_dataset) scores['test'] = evaluation(sess, model, test_dataset) early_stop_score = 0. if args.topk_eval: # topk evaluation topk_scores = topk_evaluation(sess, model, topk_dataset, eval_record, test_record, args.k_list) for t in ['eval', 'test']: for m in ['p', 'r', 'ndcg']: scores[t][m] = topk_scores[t][m] early_stop_score = scores['eval']['r'][-1] # else: early_stop_score = scores['eval']['auc'] logger.update_score(epoch, scores) print('training time: %.1fs' % (t_flag - t_start), end='') print(', total: %.1fs.' % (time() - t_start)) if early_stop_score >= early_stop.best_score: print('save embs ...', end='\r') model.save_pretrained_emb(sess) if early_stop.update_score(epoch, early_stop_score) == True: break tf.reset_default_graph()
def train(args, data_info, logger): train_data, eval_data, test_data = data_info[0], data_info[1], data_info[2] n_item, n_user = data_info[3], data_info[4] n_entity, n_relation = data_info[5], data_info[6] ripple_set, item_set_most_pop = data_info[7], data_info[-1] if args.show_save_dataset_info: print(f'train({len(train_data)}), eval({len(eval_data)}), test({len(test_data)})') # train_dataset = get_dataset(train_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) # eval_dataset = get_dataset(eval_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) # test_dataset = get_dataset(test_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) # if args.topk_eval: # topk_dataset = get_dataset(topk_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size) if args.topk_eval: user_list, train_record, eval_record, test_record, item_set, k_list = topk_settings(args, train_data, eval_data, test_data, n_item) # init early stop controller early_stop = Early_stop_info(args) config = tf.ConfigProto() config.gpu_options.allow_growth=True config.gpu_options.per_process_gpu_memory_fraction = args.gpu_fract with tf.Session(config=config) as sess: model = RippleNet(args, n_entity, n_relation) init = tf.global_variables_initializer() sess.run(init) # if args.load_emb == True: # print('load pretrained emb ...') # model.initialize_pretrained_embeddings(sess) for epoch in range(80): scores = {t: {} for t in ['train', 'eval', 'test']} np.random.shuffle(train_data) start = 0 t_start = time() while start < train_data.shape[0]: _, loss = model.train( sess, get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size)) start += args.batch_size t_flag = time() # evaluation scores['train'] = evaluation(sess, args, model, train_data, ripple_set, args.batch_size) scores['eval'] = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size) scores['test'] = evaluation(sess, args, model, test_data, ripple_set, args.batch_size) early_stop_score = 0. if args.topk_eval: # topk evaluation # topk_scores = topk_evaluation(sess, model, topk_data, ripple_set, args.k_list) precision, recall, ndcg, MAP, hit_ratio = topk_eval( sess, args, ripple_set, model, user_list, train_record, eval_record, test_record, item_set_most_pop, k_list, args.batch_size, mode = 'eval') n_precision_eval = [round(i, 6) for i in precision] n_recall_eval = [round(i, 6) for i in recall] n_ndcg_eval = [round(i, 6) for i in ndcg] for t in ['eval']: scores[t]['p'] = n_precision_eval scores[t]['r'] = n_recall_eval scores[t]['ndcg'] = n_ndcg_eval precision, recall, ndcg, MAP, hit_ratio = topk_eval( sess, args, ripple_set, model, user_list, train_record, eval_record, test_record, item_set_most_pop, k_list, args.batch_size, mode = 'test') n_precision_test = [round(i, 6) for i in precision] n_recall_test = [round(i, 6) for i in recall] n_ndcg_test = [round(i, 6) for i in ndcg] for t in ['test']: scores[t]['p'] = n_precision_test scores[t]['r'] = n_recall_test scores[t]['ndcg'] = n_ndcg_test # for m in ['p', 'r', 'ndcg']: # scores[t][m] = topk_scores[t][m] # print('scores = ', scores) early_stop_score = scores['eval']['r'][2] # else: early_stop_score = scores['eval']['auc'] logger.update_score(epoch, scores) print('training time: %.1fs' % (t_flag - t_start), end='') print(', total: %.1fs.' % (time() - t_start)) # if early_stop_score >= early_stop.best_score: # print('save embs ...', end='\r') # model.save_pretrained_emb(sess) if early_stop.update_score(epoch, early_stop_score) == True: break tf.reset_default_graph()