def fit(self): # train valid_aps = 0 for e in range(args.n_epochs): t1 = time() avg_loss = self.train_one_epoch() t2 = time() if e % 5 == 0 or e == self.args.n_epochs - 1: precision, recall, mean_aps = evaluate_ranking(self, self.test, self.train, k=[1, 5, 10]) precs = [np.mean(p) for p in precision] recalls = [np.mean(r) for r in recall] output_str = f"Epoch {e+1} [{t2-t1:.1f}s]\tloss={avg_loss:.4f}, map={mean_aps:.4f}, " \ f"prec@1={precs[0]:.4f}, prec@5={precs[1]:.4f}, prec@10={precs[2]:.4f}, " \ f"recall@1={recalls[0]:.4f}, recall@5={recalls[1]:.4f}, recall@10={recalls[2]:.4f}, [{time()-t2:.1f}s]" if mean_aps >= valid_aps: mean_aps = valid_aps else: break print(output_str) return { 'epochs': e, 'loss': avg_loss, 'mAP': mean_aps, 'prec1': precs[0], 'prec5': precs[1], 'prec10': precs[2], 'recall1': recalls[0], 'recall5': recalls[1], 'recall10': recalls[2] }
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`interactions.Interactions` training instances, also contains test sequences test: :class:`interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert sequences, targets and users to numpy arrays sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] print('total training instances: %d' % n_train) if not self._initialized: self._initialize(train) start_epoch = 1 if self.checkpoint: print("loading checkpoint from %s" % self.checkpoint) checkpoint = torch.load(self.checkpoint) start_epoch = checkpoint['epoch_num'] self._net.load_state_dict(checkpoint['state_dict']) self._optimizer.load_state_dict(checkpoint['optimizer']) print("loaded checkpoint %s (epoch %d)" % (self.checkpoint, start_epoch)) # compute number of parameters print("Number of params: %d" % compute_model_size(self._net)) for epoch_num in range(start_epoch, self._n_iter + 1): t1 = time() # set model to training model and move it to the corresponding devices self._net.train() self._net = self._net.to(self._device) users_np, sequences_np, targets_np = shuffle( users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = ( torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate( minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): # concatenate all variables to get predictions in one run items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split( items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss loss = sigmoid_log_loss(targets_prediction, negatives_prediction) epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and epoch_num % 10 == 0: precision, recall, ndcg, mean_aps = evaluate_ranking( self, test, train, k=[3, 5, 10]) str_precs = "precisions=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in precision]) str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in recall]) str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in ndcg]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, " \ "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1, epoch_loss, mean_aps, str_precs, str_recalls, str_ndcgs, time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % ( epoch_num, t2 - t1, epoch_loss, time() - t2) print(output_str) _save_checkpoint( { 'epoch_num': epoch_num, 'state_dict': self._net.state_dict(), 'optimizer': self._optimizer.state_dict(), }, 'checkpoints/gowalla-caser-dim=%d.pth.tar' % self.model_args.d)
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences = train.sequences.sequences targets = train.sequences.targets users = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training model self._net.train() users, sequences, targets = shuffle(users, sequences, targets) negative_samples = self._generate_negative_samples(users, train, n=self._neg_samples * T) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) user_tensor = gpu(torch.from_numpy(users), self._use_cuda) item_target_tensor = gpu(torch.from_numpy(targets), self._use_cuda) item_negative_tensor = gpu(torch.from_numpy(negative_samples), self._use_cuda) epoch_loss = 0.0 for minibatch_num, \ (batch_sequence, batch_user, batch_target, batch_negative) in enumerate(minibatch(sequences_tensor, user_tensor, item_target_tensor, item_negative_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_var = Variable(batch_user) item_target_var = Variable(batch_target) item_negative_var = Variable(batch_negative) target_prediction = self._net(sequence_var, user_var, item_target_var) negative_prediction = self._net(sequence_var, user_var, item_negative_var, use_cache=True) self._optimizer.zero_grad() # compute the binary cross-entropy loss positive_loss = -torch.mean(torch.log(F.sigmoid(target_prediction))) negative_loss = -torch.mean(torch.log(1 - F.sigmoid(negative_prediction))) loss = positive_loss + negative_loss epoch_loss += loss.data[0] loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 1 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str)
def fit(self, train, test, verbose=False): sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 best_p1, best_p5, best_p10, best_r1, best_r5, best_r10, best_map, best_n5, best_h5, best_f5 \ = [0 for _ in range(10)] for epoch_num in range(start_epoch, self._n_iter): t1 = time() self._net.train() users_np, sequences_np, targets_np = shuffle( users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) users, sequences, targets, negatives = ( torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate( minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split( items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() positive_loss = -torch.mean( torch.log(torch.sigmoid(targets_prediction))) negative_loss = -torch.mean( torch.log(1 - torch.sigmoid(negatives_prediction))) loss = positive_loss + negative_loss epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 1 == 0: precision, recall, mean_aps, ndcgs, hrs, f1s = evaluate_ranking( self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.5f, map=%.5f, " \ "NDCG@5=%.5f, HR@5=%.5f, f1@5=%.5f, "\ "prec@5=%.5f, recall@5=%.5f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(ndcgs[1]), np.mean(hrs[1]), np.mean(f1s[1]), np.mean(precision[1]), np.mean(recall[1]), time() - t2) print(output_str) best_p1 = np.mean(precision[0]) if np.mean( precision[0]) > best_p1 else best_p1 best_p5 = np.mean(precision[1]) if np.mean( precision[1]) > best_p5 else best_p5 best_p10 = np.mean(precision[2]) if np.mean( precision[2]) > best_p10 else best_p10 best_r1 = np.mean( recall[0]) if np.mean(recall[0]) > best_r1 else best_r1 best_r5 = np.mean( recall[1]) if np.mean(recall[1]) > best_r5 else best_r5 best_r10 = np.mean( recall[2]) if np.mean(recall[2]) > best_r10 else best_r10 best_map = mean_aps if mean_aps > best_map else best_map best_n5 = np.mean( ndcgs[1]) if np.mean(ndcgs[1]) > best_n5 else best_n5 best_h5 = np.mean( hrs[1]) if np.mean(hrs[1]) > best_h5 else best_h5 best_f5 = np.mean( f1s[1]) if np.mean(f1s[1]) > best_f5 else best_f5 else: output_str = "Epoch %d [%.1f s]\tloss=%.5f [%.1f s]" % ( epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str) best_str = "best_p5=%.5f, best_r5=%.5f, best_f5=%.5f, best_n5=%.5f, best_h5=%.5f" \ % (best_p5, best_r5, best_f5, best_n5, best_h5) print(best_str)
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training mode self._net.train() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = (torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split(items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss positive_loss = -torch.mean( torch.log(torch.sigmoid(targets_prediction))) negative_loss = -torch.mean( torch.log(1 - torch.sigmoid(negatives_prediction))) loss = positive_loss + negative_loss epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 10 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str)
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences = train.sequences.sequences targets = train.sequences.targets users = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training model self._net.train() users, sequences, targets = shuffle(users, sequences, targets) negative_samples = self._generate_negative_samples(users, train, n=self._neg_samples * T) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) user_tensor = gpu(torch.from_numpy(users), self._use_cuda) item_target_tensor = gpu(torch.from_numpy(targets), self._use_cuda) item_negative_tensor = gpu(torch.from_numpy(negative_samples), self._use_cuda) epoch_loss = 0.0 for minibatch_num, \ (batch_sequence, batch_user, batch_target, batch_negative) in enumerate(minibatch(sequences_tensor, user_tensor, item_target_tensor, item_negative_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_var = Variable(batch_user) item_target_var = Variable(batch_target) item_negative_var = Variable(batch_negative) # concatenate all variables to get predictions in one run items_var = torch.cat((item_target_var, item_negative_var), 1) items_prediction = self._net(sequence_var, user_var, items_var) (positive_prediction, negative_prediction) = torch.split(items_prediction, [item_target_var.size(1), item_negative_var.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss loss = sigmoid_log_loss(positive_prediction, negative_prediction) epoch_loss += loss.data[0] loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 10 == 0: precision, recall, ndcg, mean_aps = evaluate_ranking(self, test, train, k=[3, 5, 10]) str_precs = "precisions=%.4f,%.4f,%.4f" % tuple([np.mean(a) for a in precision]) str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple([np.mean(a) for a in recall]) str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple([np.mean(a) for a in ndcg]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, " \ "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, str_precs, str_recalls, str_ndcgs, time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str) self._save_checkpoint({ 'epoch_num' : epoch_num + 1, 'state_dict': self._net.state_dict(), 'optimizer' : self._optimizer.state_dict(), }, 'gcaser-%d-%d-L5T1.pth.tar' % (self.model_args.d, epoch_num + 1))
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`interactions.Interactions` training instances, also contains test sequences test: :class:`interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert sequences, targets and users to numpy arrays sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) self.L, self.T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._teacher_initialized: self._initialize_teacher(train) if not self._student_initialized: self._initialize_student(train) # here we compute teacher top-K ranking for each training instance in advance for faster training speed # while we have to compute the top-K ranking on the fly if it is too large to keep in memory if os.path.isfile(self._teacher_topk_path): print('found teacher topk file, loading..') teacher_ranking = np.load(self._teacher_topk_path) else: print('teacher topk file not found, generating.. ') teacher_ranking = self._get_teacher_topk(sequences_np, users_np, targets_np, k=self._K) # initialize static weight (position importance weight) weight_static = np.array(range(1, self._K + 1), dtype=np.float32) weight_static = np.exp(-weight_static / self._lambda) weight_static = weight_static / np.sum(weight_static) weight_static = torch.from_numpy(weight_static).to(self._device) weight_static = weight_static.unsqueeze(0) # initialize dynamic weight (ranking discrepancy weight) weight_dynamic = None # count number of parameters print("Number of params in teacher model: %d" % compute_model_size(self._teacher_net)) print("Number of params in student model: %d" % compute_model_size(self._student_net)) indices = np.arange(n_train) start_epoch = 1 for epoch_num in range(start_epoch, self._n_iter + 1): t1 = time() # set teacher model to evaluation mode and move it to the corresponding devices self._teacher_net.eval() self._teacher_net = self._teacher_net.to(self._device) # set student model to training mode and move it to the corresponding devices self._student_net.train() self._student_net = self._student_net.to(self._device) (users_np, sequences_np, targets_np), shuffle_indices = shuffle(users_np, sequences_np, targets_np, indices=True) indices = indices[ shuffle_indices] # keep indices for retrieval teacher's top-K ranking from cache negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) dynamic_samples_np = self._generate_negative_samples( users_np, train, n=self._num_dynamic_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = ( torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) dynamic_samples = torch.from_numpy(dynamic_samples_np).long().to( self._device) epoch_loss = 0.0 epoch_regular_loss = 0.0 for (minibatch_num, (batch_indices, batch_users, batch_sequences, batch_targets, batch_negatives, batch_dynamics)) in enumerate( minibatch(indices, users, sequences, targets, negatives, dynamic_samples, batch_size=self._batch_size)): # retrieval teacher top-K ranking given indices batch_candidates = torch.from_numpy( teacher_ranking[batch_indices, :]).long().to(self._device) # concatenate all variables to get predictions in one run items_to_predict = torch.cat( (batch_targets, batch_negatives, batch_candidates, batch_dynamics), 1) items_prediction = self._student_net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction, candidates_prediction, dynamics_prediction) = torch.split(items_prediction, [ batch_targets.size(1), batch_negatives.size(1), batch_candidates.size(1), batch_dynamics.size(1) ], dim=1) self._optimizer.zero_grad() if epoch_num > self._dynamic_start_epoch: # compute dynamic weight dynamic_weights = list() for col in range(self._K): col_prediction = candidates_prediction[:, col].unsqueeze( 1) num_smaller_than = torch.sum( col_prediction < dynamics_prediction, dim=1).float() relative_rank = num_smaller_than / self._num_dynamic_samples predicted_rank = torch.floor(self._num_items * relative_rank) dynamic_weight = torch.tanh(self._mu * (predicted_rank - col)) dynamic_weight = torch.clamp(dynamic_weight, min=0.0) dynamic_weights.append(dynamic_weight) weight_dynamic = torch.stack(dynamic_weights, 1) # hybrid two weights weight = weight_dynamic * weight_static if self._weight_renormalize: weight = F.normalize(weight, p=1, dim=1) else: weight = weight_static # detach the weight to stop the gradient flow to the weight weight = weight.detach() loss, regular_loss = weighted_sigmoid_log_loss( targets_prediction, negatives_prediction, candidates_prediction, weight, self._teach_alpha) epoch_loss += loss.item() epoch_regular_loss += regular_loss.item() loss.backward() # assert False self._optimizer.step() epoch_loss /= minibatch_num + 1 epoch_regular_loss /= minibatch_num + 1 t2 = time() if verbose and epoch_num % 10 == 0: precision, recall, ndcg, mean_aps = evaluate_ranking( self, test, train, k=[3, 5, 10]) str_precs = "precisions=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in precision]) str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in recall]) str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in ndcg]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f, " \ "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num, t2 - t1, epoch_loss, epoch_regular_loss, mean_aps, str_precs, str_recalls, str_ndcgs, time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f[%.1f s]" % ( epoch_num, t2 - t1, epoch_loss, epoch_regular_loss, time() - t2) print(output_str)
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users_np, sequences_np, targets_np, negatives_np, batch_size=self._batch_size)): items_to_predict = np.concatenate((batch_targets, batch_negatives), 1) loss = self._net.train(self.sess, batch_sequences, batch_users, items_to_predict) epoch_loss += loss epoch_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 10 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f, [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, time() - t2) print(output_str)
def fit(self, train, test, verbose=False): sequences = train.sequences.sequences targets = train.sequences.targets users = train.sequences.user_ids.reshape(-1, 1) self.L, self.T = train.sequences.L, train.sequences.T n_train = sequences.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._teacher_initialized: self._initialize_teacher(train) if not self._student_initialized: self._initialize_student(train) # make teacher top-K ranking candidates = self._get_teacher_topk(sequences, users, targets, self._K, self._teacher_topk_path) # initialize static weight weight_static = np.array(range(1, self._K + 1), dtype=np.float32) weight_static = np.exp(-weight_static / self._lambda) weight_static = weight_static / np.sum(weight_static) weight_static = Variable( gpu(torch.from_numpy(weight_static), self._use_cuda)).unsqueeze(0) # initialize dynamic weight weight_warp = None # count number of parameters num_params = 0 for param in self._net.parameters(): num_params += param.view(-1).size()[0] print("Number of params: %d" % num_params) indices = np.arange(n_train) start_epoch = 0 for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training model self._net.train() (users, sequences, targets), shuffle_indices = shuffle(users, sequences, targets, indices=True) indices = indices[ shuffle_indices] # keep indices for retrieval teacher's top-K ranking negative_samples = self._generate_negative_samples( users, train, n=self._neg_samples * self.T) dynamic_samples = self._generate_samples(users, n=self._dynamic_samples) sequences_tensor = gpu(torch.from_numpy(sequences), self._use_cuda) user_tensor = gpu(torch.from_numpy(users), self._use_cuda) item_target_tensor = gpu(torch.from_numpy(targets), self._use_cuda) item_negative_tensor = gpu(torch.from_numpy(negative_samples), self._use_cuda) dynamic_sample_tensor = gpu(torch.from_numpy(dynamic_samples), self._use_cuda) epoch_loss = 0.0 epoch_regular_loss = 0.0 for minibatch_num, \ (batch_indices, batch_sequence, batch_user, batch_target, batch_negative, batch_dynamic) in enumerate(minibatch(indices, sequences_tensor, user_tensor, item_target_tensor, item_negative_tensor, dynamic_sample_tensor, batch_size=self._batch_size)): sequence_var = Variable(batch_sequence) user_var = Variable(batch_user) item_target_var = Variable(batch_target) item_negative_var = Variable(batch_negative) dynamic_sample_var = Variable(batch_dynamic) # retrieval teacher top-K ranking for given indices teacher_topk_var = Variable( gpu(torch.from_numpy(candidates[batch_indices, :]), self._use_cuda)) # concatenate all variables to get predictions in one run items_var = torch.cat((item_target_var, item_negative_var, teacher_topk_var, dynamic_sample_var), 1) items_prediction = self._net(sequence_var, user_var, items_var) (positive_prediction, negative_prediction, teacher_topk_prediction, dynamic_sample_prediction) = torch.split(items_prediction, [ item_target_var.size(1), item_negative_var.size(1), teacher_topk_var.size(1), dynamic_sample_var.size(1) ], dim=1) self._optimizer.zero_grad() # compute dynamic weight dynamic_weights = list() for col in range(self._K): col_prediction = teacher_topk_prediction[:, col].unsqueeze(1) _dynamic_weight = torch.sum( col_prediction < dynamic_sample_prediction, dim=1).float() / self._dynamic_samples _dynamic_weight = torch.floor(self._num_items * _dynamic_weight) dynamic_weight = F.tanh(self._mu * (_dynamic_weight - col)) dynamic_weight = torch.clamp(dynamic_weight, min=0.0) dynamic_weights.append(dynamic_weight) weight_dynamic = torch.stack(dynamic_weights, 1) if epoch_num + 1 >= self._dynamic_start_epoch: weight = weight_dynamic * weight_static weight = F.normalize(weight, p=1, dim=1) else: weight = weight_dynamic weight = weight.detach() loss, regular_loss = self._loss_func(positive_prediction, negative_prediction, teacher_topk_prediction, weight, self._teach_alpha) epoch_loss += loss.data[0] epoch_regular_loss += regular_loss.data[0] loss.backward() # assert False self._optimizer.step() epoch_loss /= minibatch_num + 1 epoch_regular_loss /= minibatch_num + 1 t2 = time() if verbose and (epoch_num + 1) % 10 == 0: precision, recall, ndcg, mean_aps = evaluate_ranking( self, test, train, k=[3, 5, 10]) str_precs = "precisions=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in precision]) str_recalls = "recalls=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in recall]) str_ndcgs = "ndcgs=%.4f,%.4f,%.4f" % tuple( [np.mean(a) for a in ndcg]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f, " \ "map=%.4f, %s, %s, %s[%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, epoch_regular_loss, mean_aps, str_precs, str_recalls, str_ndcgs, time() - t2) print(output_str) else: output_str = "Epoch %d [%.1f s]\tloss=%.4f, regular_loss=%.4f[%.1f s]" % ( epoch_num + 1, t2 - t1, epoch_loss, epoch_regular_loss, time() - t2) print(output_str)
def fit(self, train, test, verbose=False): """ The general training loop to fit the model Parameters ---------- train: :class:`spotlight.interactions.Interactions` training instances, also contains test sequences test: :class:`spotlight.interactions.Interactions` only contains targets for test sequences verbose: bool, optional print the logs """ # convert to sequences, targets and users sequences_np = train.sequences.sequences targets_np = train.sequences.targets users_np = train.sequences.user_ids.reshape(-1, 1) L, T = train.sequences.L, train.sequences.T n_train = sequences_np.shape[0] output_str = 'total training instances: %d' % n_train print(output_str) if not self._initialized: self._initialize(train) start_epoch = 0 best_map = 0 ### create directory if not exists save_dir = args.save_root + args.dataset + '/' if not os.path.exists(save_dir): os.makedirs(save_dir) results = pd.DataFrame() #results_odd = pd.DataFrame() for epoch_num in range(start_epoch, self._n_iter): t1 = time() # set model to training mode self._net.train() users_np, sequences_np, targets_np = shuffle(users_np, sequences_np, targets_np) negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples) # convert numpy arrays to PyTorch tensors and move it to the corresponding devices users, sequences, targets, negatives = (torch.from_numpy(users_np).long(), torch.from_numpy(sequences_np).long(), torch.from_numpy(targets_np).long(), torch.from_numpy(negatives_np).long()) users, sequences, targets, negatives = (users.to(self._device), sequences.to(self._device), targets.to(self._device), negatives.to(self._device)) epoch_loss = 0.0 for (minibatch_num, (batch_users, batch_sequences, batch_targets, batch_negatives)) in enumerate(minibatch(users, sequences, targets, negatives, batch_size=self._batch_size)): items_to_predict = torch.cat((batch_targets, batch_negatives), 1) items_prediction = self._net(batch_sequences, batch_users, items_to_predict) (targets_prediction, negatives_prediction) = torch.split(items_prediction, [batch_targets.size(1), batch_negatives.size(1)], dim=1) self._optimizer.zero_grad() # compute the binary cross-entropy loss positive_loss = -torch.mean( torch.log(torch.sigmoid(targets_prediction))) negative_loss = -torch.mean( torch.log(1 - torch.sigmoid(negatives_prediction))) loss = positive_loss + negative_loss epoch_loss += loss.item() loss.backward() self._optimizer.step() epoch_loss /= minibatch_num + 1 parameterset = {} t2 = time() if verbose: #and (epoch_num + 1) % 2 == 0: precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10]) output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \ "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \ "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f,"\ "f1_score@1=%.4f,f1_score@5=%.4f,f1_score@10=%.4f,[%.1f s]" % (epoch_num + 1, t2 - t1, epoch_loss, mean_aps, np.mean(precision[0]), np.mean(precision[1]), np.mean(precision[2]), np.mean(recall[0]), np.mean(recall[1]), np.mean(recall[2]), f1_score(np.mean(precision[0]),np.mean(recall[0])), f1_score(np.mean(precision[1]),np.mean(recall[1])), f1_score(np.mean(precision[2]),np.mean(recall[2])), time() - t2) parameterset["Epoch"] = epoch_num + 1 parameterset["time1"] = t2 - t1 parameterset["loss"] = epoch_loss parameterset["map"] = mean_aps parameterset["prec@1"] = np.mean(precision[0]) parameterset["prec@5"] = np.mean(precision[1]) parameterset["prec@10"] = np.mean(precision[2]) parameterset["recall@1"] = np.mean(recall[0]) parameterset["recall@5"] = np.mean(recall[1]) parameterset["recall@10"] = np.mean(recall[2]) parameterset["f1_score@1"] = f1_score(np.mean(precision[0]),np.mean(recall[0])) parameterset["f1_score@5"] = f1_score(np.mean(precision[1]),np.mean(recall[1])) parameterset["f1_score@10"] = f1_score(np.mean(precision[2]),np.mean(recall[2])) parameterset["time2"] = time() - t2 results = results.append(parameterset, ignore_index=True) print(output_str) if mean_aps > best_map: best_map = mean_aps checkpoint_name = "best_model.pth.tar" save_checkpoint({ 'epoch': epoch_num+1, 'state_dict': self._net.state_dict(), 'optimizer': self._optimizer.state_dict(), }, checkpoint_name, save_dir) #else: # output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1, # t2 - t1, # epoch_loss, # time() - t2) # parameterset["Epoch"] = epoch_num + 1 # parameterset["time1"] = t2 - t1 # parameterset["loss"] = epoch_loss # parameterset["time2"] = time() - t2 # results_odd = results_odd.append(parameterset, ignore_index=True) # print(output_str) print ('***** Best map:{0:.4f} *****'.format(best_map)) #results_odd.to_csv("results/Odd_ml1m", index=False) results.to_csv("results/ml1m_hold", index=False)