def __init__(self, opt): super(MFBPRFactorizer, self).__init__(opt) self.model = MF(opt) self.opt = opt if self.use_cuda: use_cuda(True, opt['device_id']) self.model.cuda() self.optimizer = use_optimizer(self.model, opt) self.scheduler = ExponentialLR(self.optimizer, gamma=opt['lr_exp_decay'])
def init_episode(self): opt = self.opt self.model = MF(opt) self._train_step_idx = 0 if self.use_cuda: use_cuda(True, opt['device_id']) self.model.cuda() self.optimizer = use_optimizer(self.model, opt) self.scheduler = ExponentialLR(self.optimizer, gamma=opt['lr_exp_decay']) self.param = [p.data.clone() for p in self.model.parameters()]
class MFBPRFactorizer(BPR_Factorizer): def __init__(self, opt): super(MFBPRFactorizer, self).__init__(opt) self.model = MF(opt) self.opt = opt if self.use_cuda: use_cuda(True, opt['device_id']) self.model.cuda() self.optimizer = use_optimizer(self.model, opt) self.scheduler = ExponentialLR(self.optimizer, gamma=opt['lr_exp_decay']) def init_episode(self): opt = self.opt self.model = MF(opt) self._train_step_idx = 0 if self.use_cuda: use_cuda(True, opt['device_id']) self.model.cuda() self.optimizer = use_optimizer(self.model, opt) self.scheduler = ExponentialLR(self.optimizer, gamma=opt['lr_exp_decay']) self.param = [p.data.clone() for p in self.model.parameters()] def update(self, sampler, l2_lambda): """update MF model paramters given (u, i, j) Args: l2_lambda: pytorch Tensor, dimension-wise lambda """ super(MFBPRFactorizer, self).update(sampler, l2_lambda) u, i, j = sampler.get_sample('train') assert isinstance(u, torch.LongTensor) assert isinstance(i, torch.LongTensor) assert isinstance(j, torch.LongTensor) preference = torch.ones(u.size()[0]) if self.use_cuda: u, i, j = u.cuda(), i.cuda(), j.cuda() preference = preference.cuda() prob_preference = self.model.forward_triple(u, i, j) non_reg_loss = self.criterion(prob_preference, preference) / (u.size()[0]) l2_reg_loss = self.model.l2_penalty(l2_lambda, u, i, j) / (u.size()[0]) loss = non_reg_loss + l2_reg_loss loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) if (self.opt['optimizer'] == 'adam') and self.is_assumed: param_grad = [] for p in self.model.parameters( ): # Note: deepcopy model lose the gradients if p.grad is not None: param_grad.append(p.grad.data.clone()) else: print('\tNo gradient!') # param_grad.append(torch.zeros_like(p.data.cpu())) param_grad.append(torch.zeros_like(p.data)) self.param_grad = param_grad self.optim_status = { 'step': [ v['step'] for _, v in self.optimizer.state.items() if len(v) > 0 ], 'exp_avg': [ v['exp_avg'].data.clone() for _, v in self.optimizer.state.items() if len(v) > 0 ], 'exp_avg_sq': [ v['exp_avg_sq'].data.clone() for _, v in self.optimizer.state.items() if len(v) > 0 ] } self.optimizer.step() # print('\tcurrent learing rate of MF optimizer ...') # for param_grp in self.optimizer.param_groups: # print('\t{}'.format(param_grp['lr'])) # print('-' * 80) if 'alter' in self.opt['regularizer']: # only adaptive methods need to cache current param self.param = [p.data.clone() for p in self.model.parameters()] self.l2_penalty = l2_reg_loss.item() # print('Loss {}, Non-reg Loss {}, L2 lambda {}'.format(loss.item(), non_reg_loss.item(), l2_reg_loss.item())) return non_reg_loss.item()