def test(args): data = Interactions(args.test_root) data.to_sequence(args.L, args.T) sequences_np = data.sequences.sequences targets_np = data.sequences.targets users_np = data.sequences.user_ids.reshape(-1, 1) n_test = sequences_np.shape[0] print('total test instances: %d' % n_test) num_users = data.num_users num_items = data.num_items NDCG, HR, MRR = 0.0, 0.0, 0.0 item_ids = np.zeros((args.batch_size,num_items)) for i in range(args.batch_size): item_ids[i] = np.arange(num_items) test_batches = n_test // args.batch_size model=Caser(num_users,num_items,args) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(args.check_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Restore model from {} successfully!'.format(args.check_dir)) else: print('Restore model from {} failed!'.format(args.check_dir)) return for i in range(test_batches): sequences = sequences_np[i * args.batch_size: (i + 1) * args.batch_size] targets = targets_np[i * args.batch_size: (i + 1) * args.batch_size] users = users_np[i * args.batch_size: (i + 1) * args.batch_size] _, top_k_index = model.predict(sess, sequences, users, item_ids) hr, ndcg, mrr = 0.0, 0.0, 0.0 for i in range(args.batch_size): cur_user = top_k_index[i] for j in range(args.top_k): if targets[i][0] == cur_user[j]: hr += 1 mrr += 1 / (1 + j) dcg = 1 / np.log2(1 + 1 + j) idcg = 1 / np.log2(1 + 1) ndcg += dcg / idcg break HR += hr / args.batch_size NDCG += ndcg / args.batch_size MRR += mrr / args.batch_size return HR / test_batches, NDCG / test_batches, MRR / test_batches
class Recommender(object): """ Contains attributes and methods that needed to train a sequential recommendation model. Models are trained by many tuples of (users, sequences, targets, negatives) and negatives are from negative sampling: for any known tuple of (user, sequence, targets), one or more items are randomly sampled to act as negatives. Parameters ---------- args: args, Model-related arguments, like latent dimensions. """ def __init__(self, args=None): # model related self._num_items = None self._num_users = None self._net = None self.args = args # learning related self._batch_size = self.args.batch_size self._n_iter = self.args.n_iter self._neg_samples = self.args.neg_samples # rank evaluation related self.test_sequence = None self._candidate = dict() @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.args) self._net.build_model() self.sess = tf.Session() init = tf.global_variables_initializer() self.sess.run(init) def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users_np, sequences_np, targets_np, negatives_np, batch_size=self._batch_size)): items_to_predict = np.concatenate((batch_targets, batch_negatives), 1) loss = self._net.train(self.sess, batch_sequences, batch_users, items_to_predict) epoch_loss += loss epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 10 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str) def _generate_negative_samples(self, users, interactions, n): """ Sample negative from a candidate set of each user. The candidate set of each user is defined by: {All Items} \ {Items Rated by User} Parameters ---------- users: array of np.int64 sequence users interactions: :class:`spotlight.interactions.Interactions` training instances, used for generate candidates n: int total number of negatives to sample for each sequence """ users_ = users.squeeze() negative_samples = np.zeros((users_.shape[0], n), np.int64) if not self._candidate: all_items = np.arange(interactions.num_items - 1) + 1 # 0 for padding train = interactions.tocsr() for user, row in enumerate(train): self._candidate[user] = list(set(all_items) - set(row.indices)) for i, u in enumerate(users_): for j in range(n): x = self._candidate[u] negative_samples[i, j] = x[ np.random.randint(len(x))] return negative_samples def predict(self, user_id, item_ids=None): """ Make predictions for evaluation: given a user id, it will first retrieve the test sequence associated with that user and compute the recommendation scores for items. Parameters ---------- user_id: int users id for which prediction scores needed. item_ids: array, optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. """ sequences_np = self.test_sequence.sequences[user_id, :] sequences_np = np.atleast_2d(sequences_np) if item_ids is None: item_ids = np.arange(self._num_items).reshape(-1, 1) out = self._net.predict(self.sess, sequences_np, user_id, item_ids) return out
class Recommender(object): """ args: args,Model-related arguments, like latent dimensions. """ def __init__(self, args=None): # model related self._num_items = None self._num_users = None self._net = None self.args = args # learning related self._batch_size = self.args.batch_size self._n_iter = self.args.n_iter self._neg_samples = self.args.neg_samples # rank evaluation related self.test_sequence = None self._candidate = dict() self._top_k = args.top_k @property def _initialized(self): return self._net is not None def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Caser(self._num_users, self._num_items, self.args) self.sess = tf.Session() init = tf.global_variables_initializer() self.sess.run(init) def fit(self, train, val, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences val: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] print('total training instances: %d' % n_train) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() users_np, sequences_np, targets_np = shuffle( users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) step_loss = 0.0 best_HR = 0.0 for (minibatch_num,(batch_users,batch_sequences,batch_targets,batch_negatives)) \ in enumerate(minibatch(users_np,sequences_np,targets_np,negatives_np,batch_size=self._batch_size)): items_to_predict = np.concatenate( (batch_targets, batch_negatives), 1) loss, global_step = self._net.train(self.sess, batch_sequences, batch_users, items_to_predict) step_loss += loss if global_step % 1000 == 0: print('epoch-{}\tstep-{}\tloss-{:.6f}'.format( epoch_num + 1, global_step, step_loss / global_step)) if verbose and global_step % 10000 == 0: t2 = time() HR, NDCG, MRR = self.predict(val) output_str = "Epoch %d step %d [%.1f s]\tloss=%.6f,HR@20=%.6f, " \ "NDCG@20=%.6f, MRR@20=%.6f,[%.1f s] " % (epoch_num + 1,global_step, t2 - t1, step_loss/global_step, HR, NDCG, MRR, time() - t2) print(output_str) if HR > best_HR: best_HR = HR ckpt_path = self.args.check_dir + 'model.ckpt' self._net.saver.save(self.sess, ckpt_path, global_step=global_step) print("model saved to {}".format(ckpt_path)) def _generate_negative_samples(self, users, interactions, n): """ Sample negative from a candidate set of each user. The candidate set of each user is defined by: {All Items} \ {Items Rated by User} Parameters ---------- users: array of np.int64 sequence users interactions: :class:`spotlight.interactions.Interactions` training instances, used for generate candidates n: int total number of negatives to sample for each sequence """ users_ = users.squeeze() negative_samples = np.zeros((users_.shape[0], n), np.int64) if not self._candidate: all_items = np.arange(interactions.num_items - 1) + 1 # 0 for padding train = interactions.tocsr() for user, row in enumerate(train): self._candidate[user] = list(set(all_items) - set(row.indices)) for i, u in enumerate(users_): for j in range(n): x = self._candidate[u] negative_samples[i, j] = x[np.random.randint(len(x))] return negative_samples def predict(self, val, item_ids=None): """ Make predictions for evaluation: given a user id, it will first retrieve the test sequence associated with that user and compute the recommendation scores for items. Parameters ---------- user_id: int users id for which prediction scores needed. item_ids: array, optional Array containing the item ids for which prediction scores are desired. If not supplied, predictions for all items will be computed. """ sequences_np = val.sequences.sequences targets_np = val.sequences.targets users_np = val.sequences.user_ids.reshape(-1, 1) n_val = sequences_np.shape[0] print('total validation instances: %d' % n_val) NDCG, HR, MRR = 0.0, 0.0, 0.0 item_ids = np.zeros((self._batch_size, self._num_items)) for i in range(self._batch_size): item_ids[i] = np.arange(self._num_items) valid_batches = n_val // self._batch_size for i in range(valid_batches): sequences = sequences_np[i * self._batch_size:(i + 1) * self._batch_size] targets = targets_np[i * self._batch_size:(i + 1) * self._batch_size] users = users_np[i * self._batch_size:(i + 1) * self._batch_size] _, top_k_index = self._net.predict(self.sess, sequences, users, item_ids) hr, ndcg, mrr = 0.0, 0.0, 0.0 for i in range(self._batch_size): cur_user = top_k_index[i] for j in range(self._top_k): if targets[i][0] == cur_user[j]: hr += 1 mrr += 1 / (1 + j) dcg = 1 / np.log2(1 + 1 + j) idcg = 1 / np.log2(1 + 1) ndcg += dcg / idcg break HR += hr / self._batch_size NDCG += ndcg / self._batch_size MRR += mrr / self._batch_size return HR / valid_batches, NDCG / valid_batches, MRR / valid_batches