def optimizer_reset(self, learning_rate): self.learning_rate = learning_rate if self.optimizer_type == "Adam": self.optimizer = optim.Adam(self.dense_parameters, lr=learning_rate, weight_decay=self.l2_lambda) if len(self.sparse_parameters) > 0: self.sparse_optimizer = optim.SparseAdam( self.sparse_parameters, lr=learning_rate) else: self.sparse_optimizer = None elif self.optimizer_type == "SGD": self.optimizer = optim.SGD(self.dense_parameters, lr=learning_rate, weight_decay=self.l2_lambda) if len(self.sparse_parameters) > 0: self.sparse_optimizer = optim.SGD(self.sparse_parameters, lr=learning_rate) else: self.sparse_optimizer = None if the_gpu() >= 0: recursively_set_device(self.optimizer.state_dict(), the_gpu()) if self.sparse_optimizer is not None: recursively_set_device(self.sparse_optimizer.state_dict(), the_gpu())
def save(self, filename): if the_gpu() >= 0: recursively_set_device(self.model.state_dict(), gpu=-1) recursively_set_device(self.optimizer.state_dict(), gpu=-1) # Always sends Tensors to CPU. save_dict = { 'step': self.step, 'best_dev_error': self.best_dev_error, 'best_dev_step': self.best_dev_step, 'model_state_dict': self.model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), 'vocabulary': self.vocabulary } if self.sparse_optimizer is not None: save_dict[ 'sparse_optimizer_state_dict'] = self.sparse_optimizer.state_dict( ) torch.save(save_dict, filename) if the_gpu() >= 0: recursively_set_device(self.model.state_dict(), gpu=the_gpu()) recursively_set_device(self.optimizer.state_dict(), gpu=the_gpu())