def _test_restore_with_val_losses(self, checkpointer, val_losses, best_epoch): generator = some_data_generator(BestModelRestoreTest.batch_size) best_epoch_weights = None checkpointer.set_params({'epochs': len(val_losses), 'steps': 1}) checkpointer.set_model(self.model) checkpointer.on_train_begin({}) for epoch, val_loss in enumerate(val_losses, 1): checkpointer.on_epoch_begin(epoch, {}) checkpointer.on_batch_begin(1, {}) loss = self._update_model(generator) checkpointer.on_batch_end(1, { 'batch': 1, 'size': BestModelRestoreTest.batch_size, 'loss': loss }) checkpointer.on_epoch_end(epoch, { 'epoch': epoch, 'loss': loss, 'val_loss': val_loss }) if epoch == best_epoch: best_epoch_weights = torch_to_numpy( self.model.get_weight_copies()) checkpointer.on_train_end({}) final_weights = torch_to_numpy(self.model.get_weight_copies()) self.assertEqual(best_epoch_weights, final_weights)
def acc(y_pred_tensor, y_true_tensor): y_pred = torch_to_numpy(y_pred_tensor) y_true = torch_to_numpy(y_true_tensor) predictions = list() for yp, yt in zip(y_pred, y_true): predictions.append(np.argmax(yp) == yt) return y_pred_tensor.data.new([np.mean(predictions) * 100])
def _test_restore_best(self, val_losses): final_weights = torch_to_numpy(self.model.get_weight_copies()) epoch = val_losses.index(min(val_losses)) + 1 best_epoch_filename = self.checkpoint_filename.format(epoch=epoch) self.model.load_weights(best_epoch_filename) best_weights = torch_to_numpy(self.model.get_weight_copies()) self.assertEqual(best_weights, final_weights)
def acc(y_pred_tensor, y_true_tensor): y_pred_tensor = y_pred_tensor.view(y_pred_tensor.shape[0] * y_pred_tensor.shape[1], -1) y_true_tensor = y_true_tensor.view(y_true_tensor.shape[0] * y_true_tensor.shape[1]) y_pred = torch_to_numpy(y_pred_tensor) y_true = torch_to_numpy(y_true_tensor) predictions = list() for yp, yt in zip(y_pred, y_true): if yt != 0: predictions.append(np.argmax(yp) == yt) return y_pred_tensor.data.new([np.mean(predictions) * 100])
def f1(y_pred_tensor, y_true_tensor): y_pred_tensor = y_pred_tensor.view(y_pred_tensor.shape[0] * y_pred_tensor.shape[1], -1) y_true_tensor = y_true_tensor.view(y_true_tensor.shape[0] * y_true_tensor.shape[1]) y_pred = torch_to_numpy(y_pred_tensor) y_true = torch_to_numpy(y_true_tensor) predictions = list() truths = list() for yp, yt in zip(y_pred, y_true): if yt != 0: predictions.append(np.argmax(yp)) truths.append(yt) return torch.FloatTensor([f1_score(truths, predictions, average='macro')])
def predict_generator(self, generator, *, steps=None): """ Returns the predictions of the network given batches of samples ``x``, where the tensors are converted into Numpy arrays. generator: Generator-like object for the dataset. The generator must yield a batch of samples. See the ``fit_generator()`` method for details on the types of generators supported. steps (int, optional): Number of iterations done on ``generator``. (Defaults the number of steps needed to see the entire dataset) Returns: List of the predictions of each batch with tensors converted into Numpy arrays. """ self.model.eval() if steps is None and hasattr(generator, '__len__'): steps = len(generator) pred_y = [] with torch.no_grad(): for _, x in _get_step_iterator(steps, generator): x = self._process_input(x) pred_y.append(torch_to_numpy(self.model(x))) return pred_y
def _test_checkpointer(self, checkpointer, lr_scheduler): scheduler_states = {} generator = some_data_generator(OptimizerCheckpointTest.batch_size) checkpointer.set_params({ 'epochs': OptimizerCheckpointTest.epochs, 'steps': 1 }) checkpointer.set_model(self.model) checkpointer.on_train_begin({}) for epoch in range(1, OptimizerCheckpointTest.epochs + 1): checkpointer.on_epoch_begin(epoch, {}) checkpointer.on_batch_begin(1, {}) loss = self._update_model(generator) checkpointer.on_batch_end( 1, { 'batch': 1, 'size': OptimizerCheckpointTest.batch_size, 'loss': loss }) checkpointer.on_epoch_end(epoch, { 'epoch': epoch, 'loss': loss, 'val_loss': 1 }) filename = self.checkpoint_filename.format(epoch=epoch) self.assertTrue(os.path.isfile(filename)) scheduler_states[epoch] = torch_to_numpy( lr_scheduler.scheduler.state_dict(), copy=True) checkpointer.on_train_end({}) self._test_checkpoint(scheduler_states, lr_scheduler)
def _test_checkpoint(self, optimizer_states): for epoch, epoch_optimizer_state in optimizer_states.items(): filename = self.checkpoint_filename.format(epoch=epoch) self.model.load_optimizer_state(filename) saved_optimizer_state = torch_to_numpy(self.optimizer.state_dict()) self.assertEqual(epoch_optimizer_state, saved_optimizer_state)
def _test_checkpoint(self, scheduler_states, lr_scheduler): for epoch, epoch_scheduler_state in scheduler_states.items(): filename = self.checkpoint_filename.format(epoch=epoch) lr_scheduler.load_state(filename) saved_scheduler_state = torch_to_numpy( lr_scheduler.scheduler.state_dict()) self.assertEqual(epoch_scheduler_state, saved_scheduler_state)
def _compute_loss_and_metrics(self, x, y, return_loss_tensor=False, return_pred=False): x, y = self._process_input(x, y) pred_y = self.model(x) loss = self.loss_function(pred_y, y) if not return_loss_tensor: loss = float(loss) with torch.no_grad(): metrics = self._compute_metrics(pred_y, y) pred_y = torch_to_numpy(pred_y) if return_pred else None return loss, metrics, pred_y
def _loss_and_metrics_tensors_to_numpy(self, loss_tensor, metrics_tensors, pred_y=None): loss = float(loss_tensor) metrics = np.array( [float(metric_tensor) for metric_tensor in metrics_tensors]) ret = (loss, metrics) if pred_y is not None: pred_y = torch_to_numpy(pred_y) ret = ret + (pred_y, ) return ret
def predict_embeddings(model, loader): model.model.eval() predicted_embeddings = {} for x, y in loader: x = tensors_to_variables(x) embeddings = torch_to_numpy(model.model(x)) for label, embedding in zip(y, embeddings): if label in predicted_embeddings: predicted_embeddings[label].append(embedding) else: predicted_embeddings[label] = [embedding] return predicted_embeddings
def predict_on_batch(self, x): """ Returns the predictions of the network given a batch ``x``, where the tensors are converted into Numpy arrays. Args: x (Union[Tensor, np.ndarray]): Batch for which to predict. Returns: The predictions with tensors converted into Numpy arrays. """ self.model.eval() with torch.no_grad(): x = self._process_input(x) return torch_to_numpy(self.model(x))
def predict_mean_embeddings(model, loader): model.model.eval() predicted_embeddings = {} for x, y in loader: x = tensors_to_variables(x) embeddings = torch_to_numpy(model.model(x)) for label, embedding in zip(y, embeddings): if label in predicted_embeddings: predicted_embeddings[label].append(embedding) else: predicted_embeddings[label] = [embedding] mean_pred_embeddings = {} for label in predicted_embeddings: mean_pred_embeddings[label] = np.mean(np.array( predicted_embeddings[label]), axis=0) return mean_pred_embeddings
def predict_OOV(model, char_to_idx, OOV_path, filename): OOVs = load_vocab(OOV_path) vectorizer = Vectorizer(char_to_idx) examples = [(vectorizer.vectorize_sequence(word), word) for word in OOVs] loader = DataLoader(examples, collate_fn=collate_x, use_gpu=False, batch_size=1) model.model.eval() predicted_embeddings = {} for x, y in loader: x = tensors_to_variables(x) embeddings = torch_to_numpy(model.model(x)) for label, embedding in zip(y, embeddings): predicted_embeddings[label] = embedding save_embeddings(predicted_embeddings, filename)
def predict_generator(self, generator, steps=None): """ Returns the predictions of the network given batches of samples ``x``, where the tensors are converted into Numpy arrays. generator: Generator-like object for the dataset. The generator must yield a batch of samples. If the generator does not have a method ``__len__()``, the ``steps`` argument must be provided. Notice that a generator made using the python keyword ``yield`` does not have such method. However, a PyTorch DataLoader object has a such method. The method ``__iter__()`` on the generator is called and the method ``__next__()`` is called for each step on resulting object returned by ``__iter__()``. Notice that a call to ``__iter__()`` on a generator made using the python keyword ``yield`` returns the generator itself. steps (int, optional): Number of iterations done on ``generator``. (Defaults the number of steps needed to see the entire dataset) Returns: List of the predictions of each batch with tensors converted into Numpy arrays. """ self.model.eval() if steps is None: steps = len(generator) pred_y = [] iterator = iter(generator) with torch.no_grad(): for _ in range(steps): x = self._process_input(next(iterator)) pred_y.append(torch_to_numpy(self.model(x))) return pred_y
def euclidean_distance(y_pred_tensor, y_true_tensor): y_pred = torch_to_numpy(y_pred_tensor) y_true = torch_to_numpy(y_true_tensor) dist = np.linalg.norm((y_true - y_pred), axis=1).mean() return torch.FloatTensor([dist.tolist()])
def save_char_embeddings(model, char_to_idx, filename='mimick_char_embeddings'): char_embeddings = {} for char, idx in char_to_idx.items(): char_embeddings[char] = torch_to_numpy(model.model.mimick_lstm.embeddings.weight.data[idx]) save_embeddings(char_embeddings, filename)