def run(args): print 1 data_loader = DataHelper(args, args.data_path, args.save_dir) data_loader.choose_data() data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) args.n_items = len(data['ItemId'].unique()) args.dropout_p_hidden = 1.0 if args.is_training == 0 else args.dropout print(args.n_epochs) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = train.GRU4Rec(sess, args) if args.is_training==1: gru.fit(data) else: res = evaluation.evaluate_sessions_batch(gru, data, valid) print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
def main(): command_line = parseArgs() data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) # 参数 args = Args() args.n_items = len(data['ItemId'].unique()) args.layers = command_line.layer args.rnn_size = command_line.size args.n_epochs = command_line.epoch args.learning_rate = command_line.lr args.is_training = command_line.train args.test_model = command_line.test args.hidden_act = command_line.hidden_act args.final_act = command_line.final_act args.loss = command_line.loss args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout print(args.dropout_p_hidden) # print data stat data_stat = data_utils.DataUtils(data, valid, args) data_stat.session_stat() if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = model.GRU4Rec(sess, args) if args.is_training: gru.fit(data) else: res = evaluation.evaluate_sessions_batch(gru, data, valid) print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1])) ''' #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847) print('Training GRU4Rec with 256 hidden units') #gru = gru4rec.GRU4Rec(loss='bpr', final_act='linear', hidden_act='tanh', layers=[100], batch_size=32, dropout_p_hidden=0.0, learning_rate=0.2, n_sample=0, sample_alpha=0, time_sort=True, n_epochs=10) # useful result gru = gru4rec.GRU4Rec(loss='bpr', final_act='linear', hidden_act='tanh', layers=[256], batch_size=32, dropout_p_hidden=0.0, learning_rate=0.2, n_sample=0, sample_alpha=0, time_sort=True, n_epochs=10, embedding=256) #gru = gru4rec.GRU4Rec(loss='bpr-max-0.5', final_act='linear', hidden_act='tanh', layers=[256], batch_size=200, dropout_p_hidden=0.0, learning_rate=0.2, momentum=0.5, n_sample=2048, sample_alpha=0, time_sort=True) gru.fit(data) gru.save_ItemEmbedding(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) #print('Recall@20: {}'.format(res[0])) #print('MRR@20: {}'.format(res[1]))
if __name__ == '__main__': command_line = parseArgs() data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) args = Args() args.n_items = len(data['ItemId'].unique()) args.layers = command_line.layer args.rnn_size = command_line.size args.n_epochs = command_line.epoch args.learning_rate = command_line.lr args.is_training = command_line.train args.test_model = command_line.test args.hidden_act = command_line.hidden_act args.final_act = command_line.final_act args.loss = command_line.loss args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout print(args.dropout_p_hidden) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = model.GRU4Rec(sess, args) if args.is_training: gru.fit(data) else: res = evaluation.evaluate_sessions_batch(gru, data, valid) print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
# -*- coding: utf-8 -*- """ Created on Wed Apr 6 18:14:46 2016 @author: Balázs Hidasi """ import sys sys.path.append('../..') import numpy as np import pandas as pd import gru4rec import evaluation PATH_TO_TRAIN = '/path/to/rsc15_train_full.txt' PATH_TO_TEST = '/path/to/rsc15_test.txt' if __name__ == '__main__': data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId':np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId':np.int64}) print('Training GRU4Rec with 100 hidden units') gru = gru4rec.GRU4Rec(layers=[100], loss='top1', batch_size=50, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, None) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1]))
os.mkdir(ARGS.checkpoint_dir) GPU_CONFIG = tf.ConfigProto() #GPU_CONFIG.gpu_options.allow_growth = True with tf.Session(config=GPU_CONFIG) as sess: GRU = model.GRU4Rec(sess, ARGS) START_TIME = time.time() if ARGS.is_training: OUTPUT = open('train_results.txt', 'w') GRU.fit(DATA) OUTPUT.close() TRAINING_TIME = time.time() print("Training time =", TRAINING_TIME - START_TIME, "seconds") else: TEST_OUTPUT = open('test_results.txt', 'w') print("\n\nEvaluating Model....\n", file=TEST_OUTPUT) RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA) print('Recall@1: {}'.format(RES[0][0]), file=TEST_OUTPUT) print('MRR@1: {}'.format(RES[1][0]), file=TEST_OUTPUT) print('Recall@2: {}'.format(RES[0][1]), file=TEST_OUTPUT) print('MRR@2: {}'.format(RES[1][1]), file=TEST_OUTPUT) print('Recall@5: {}'.format(RES[0][2]), file=TEST_OUTPUT) print('MRR@5: {}'.format(RES[1][2]), file=TEST_OUTPUT) #RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA, 10) print('Recall@10: {}'.format(RES[0][3]), file=TEST_OUTPUT) print('MRR@10: {}'.format(RES[1][3]), file=TEST_OUTPUT) #RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA, 20) print('Recall@20: {}'.format(RES[0][4]), file=TEST_OUTPUT) print('MRR@20: {}'.format(RES[1][4]), file=TEST_OUTPUT) #RES = evaluation.evaluate_sessions_batch(GRU, DATA, DATA, 50) print('Recall@50: {}'.format(RES[0][5]), file=TEST_OUTPUT) print('MRR@50: {}'.format(RES[1][5]), file=TEST_OUTPUT)
args = Args() args.n_items = len(data['product_id'].unique()) args.layers = command_line.layer args.rnn_size = command_line.size args.n_epochs = command_line.epoch args.learning_rate = command_line.lr args.is_training = command_line.train args.test_model = command_line.test args.hidden_act = command_line.hidden_act args.final_act = command_line.final_act args.loss = command_line.loss args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout print(args.dropout_p_hidden) print(args.loss) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = model.GRU4Rec(sess, args) if args.is_training: gru.fit(data,args.loss) else: print("Testing") rec , mrr , topN = evaluation.evaluate_sessions_batch(gru, data, valid,session_key='cookie_id', item_key='product_id', time_key='timestamp') print('Recall@20: {}\tMRR@20: {}'.format(rec, mrr)) print(topN) topN = pd.DataFrame(topN) """store Top20 products for every user_session in a csv file""" topN.to_csv('/home/nick/Desktop/thesis/datasets/pharmacy-data/api-data/rnn-data/top1-top20_2.csv')
feed_dict[self.state[i]] = self.predict_state[i] preds, self.predict_state = self.sess.run(fetches, feed_dict) preds = np.asarray(preds).T return pd.DataFrame(data=preds, index=itemidmap.index) if __name__ == '__main__': defaults = Defaults() data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) defaults.n_items = len(data['ItemId'].unique()) defaults.dropout_p_hidden = 1.0 if defaults.is_training == 0 else 0.5 if not os.path.exists(defaults.checkpoint_dir): os.mkdir(defaults.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as session: predictor = Session4RecPredictor(defaults, session) if defaults.is_training: print('Start session4rec training...') predictor.train(data) else: res = evaluation.evaluate_sessions_batch(predictor, data, valid) print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
gru = MN4rec.GRU4Rec(loss='bpr', final_act='linear', hidden_act=args.activation, layers=[256], batch_size=args.batch_size, embedding=length, KBembedding=KBlength, dropout_p_hidden=args.dropout, n_sample=args.num_neg, learning_rate=args.lr, momentum=0.1, sample_alpha=0, time_sort=True, n_epochs=args.epochs, train_random_order=True, out_dim=args.out_dim, MN_nfactors=r_matrix.shape[0], MN_dims=r_matrix.shape[1]) gru.fit(data, ItemEmbedding, KBItemEmbedding, r_matrix) print("Training time is") print(start_time - time.time()) ItemFile = 'item_embedding' gru.save_ItemEmbedding(data, ItemFile) #'item.embedding') UserFile = 'user.embedding' evaluation.evaluate_sessions_batch( gru, valid, None, SaveUserFile=UserFile) #'user.embedding') end_time = time.time() print start_time, end_time print(start_time - end_time)
momentum=0.0, time_sort=False) gru.fit(data) #cf=baselines.ItemKNN() #cf.fit(data) #mf=baselines.BPR() #mf.fit(data) #acc_gru=[] #acc_cf=[] #acc_mf=[] mrr_gru = [] #mrr_cf=[] #mrr_mf=[] for i in top: res_gru = evaluation.evaluate_sessions_batch(gru, valid, None, cut_off=i + 1) #res_cf = evaluation.evaluate_sessions(cf, valid, data, None,cut_off=i+1) # res_mf = evaluation.evaluate_sessions(mf, valid, data, None,cut_off=i+1) mrr_gru.append(res_gru[1]) print mrr_gru # mrr_gru.append(res_gru[1]) # acc_cf.append(res_cf[0]) # mrr_cf.append(res_cf[1]) # acc_mf.append(res_mf[0]) # mrr_mf.append(res_mf[1]) #bar_width=0.6 #X=top #Y_gru=acc_gru #Y_cf=acc_cf #Y_mf=acc_mf
args.latent_size = command_line.latent_size args.optimizer = command_line.optimizer args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout print(args.dropout_p_hidden) if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() #gpu_config=tf.compat.v1.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = model.GRU4Rec(sess, args) if args.is_training: gru.fit(data) else: print(args.test_model) res = evaluation.evaluate_sessions_batch( gru, data, valid, cut_off=5, batch_size=args.batch_size) print('Recall@5: {}'.format(res[0])) print('MRR@5: {}'.format(res[1])) print('NDCG@5: {}'.format(res[2])) res = evaluation.evaluate_sessions_batch( gru, data, valid, cut_off=10, batch_size=args.batch_size) print('Recall@10: {}'.format(res[0])) print('MRR@10: {}'.format(res[1])) print('NDCG@10: {}'.format(res[2])) res = evaluation.evaluate_sessions_batch( gru, data, valid, cut_off=20, batch_size=args.batch_size) print('Recall@20: {}'.format(res[0])) print('MRR@20: {}'.format(res[1])) print('NDCG@20: {}'.format(res[2]))
if __name__ == '__main__': command_line = parseArgs() train_data = pd.read_csv(train_data, index_col=0) test_data = pd.read_csv(test_data, index_col=0) args = Args() args.n_items = len(train_data['ItemId'].unique()) args.layers = command_line.layer args.rnn_size = command_line.size args.n_epochs = command_line.epoch args.learning_rate = command_line.lr args.is_training = command_line.train args.test_model = command_line.test args.hidden_act = command_line.hidden_act args.final_act = command_line.final_act args.loss = command_line.loss args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = model.GRU4Rec(sess, args) if args.is_training: gru.fit(train_data) else: res = evaluation.evaluate_sessions_batch( gru, train_data, test_data, batch_size=args.batch_size) print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2, momentum=0.3, n_sample=128, sample_alpha=0, bpreg=1, constrained_embedding=False, dwell_time=100) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=5, dwell_time=100) result1[0, 0] = res[0] result1[0, 1] = res[1] result1[0, 2] = res[2] print('bpr-max-sample45') print('HR@5: {}'.format(res[0])) print('MRR@5: {}'.format(res[1])) print('NDCG@5: {}'.format(res[2])) res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=10, dwell_time=100) result1[1, 0] = res[0] result1[1, 1] = res[1]
gru = model_RBP.GRU4Rec(sess, args) if args.is_training: gru.fit(data) else: #res = evaluation.evaluate_sessions_batch(gru, data, valid) #print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1])) for c in [3,15,20]: res = evaluation.evaluate_sessions_batch(gru, data, valid, cut_off=c) print('Recall@{}: {}\tMRR@{}: {}'.format(c, res[0], c, res[1])) ### Export ratings # preds = res[2] # preds.to_csv(PATH_TO_PROCESSED_DATA + 'eikon_pred_test.csv', sep=',', index=False) #ids = res[3] #ids.to_csv(PATH_TO_PROCESSED_DATA + 'eikon_pred_test_ids.csv', sep=',', index=False)
if __name__ == '__main__': command_line = parseArgs() train_data = pd.read_csv(train_data) test_data = pd.read_csv(test_data) args = Args() args.n_items = len(train_data['ItemId'].unique()) args.layers = command_line.layer args.rnn_size = command_line.size args.n_epochs = command_line.epoch args.learning_rate = command_line.lr args.is_training = command_line.train args.test_model = command_line.test args.hidden_act = command_line.hidden_act args.final_act = command_line.final_act args.loss = command_line.loss args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: gru = model.GRU4Rec(sess, args) if args.is_training: gru.fit(train_data) else: res = evaluation.evaluate_sessions_batch(gru, train_data, test_data) print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1]))
final_act='softmax', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0.3, learning_rate=0.1, momentum=0.7, n_sample=2048, sample_alpha=0, bpreg=0, constrained_embedding=False) gru.fit(data) res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=5) print('cross-entropy-sample0') print('HR@5: {}'.format(res[0])) print('MRR@5: {}'.format(res[1])) print('MAP@5: {}'.format(res[2])) print('NDCG@5: {}'.format(res[3])) print('PRECISION@5: {}'.format(res[4])) print('F1-SCORE@5: {}'.format(res[5])) res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=10) print('HR@10: {}'.format(res[0])) print('MRR@10: {}'.format(res[1])) print('MAP@10: {}'.format(res[2])) print('NDCG@10: {}'.format(res[3])) print('PRECISION@10: {}'.format(res[4])) print('F1-SCORE@10: {}'.format(res[5]))