def evaluate_by_epochs(self, dataloader): """Evaluate dataset using the averaged models. In each epoch each process loads models and averages them. The averaged model is used to evaluate train / validation dataset. Args: dataloader (:obj:`torch.utils.data.DataLoader`): The dataset to be evaluated. Returns: list: list of stats of models in each epoch. """ stats_list = [] for epoch in range(self.epochs): # Same model for all workers. model = self._load_model(epoch) model.eval() stats = {"epoch": epoch, "count": 0, "total_loss": 0} for metric in self.metrics: stats['total_' + metric.name] = 0 data_iter = iterate_dataloader( dataloader, self.dtype, self.max_batch_per_epoch, self.use_cuda) with torch.no_grad(): for i, (data, target) in enumerate(data_iter): output = model(data) # Compute loss and metrics. count = len(target) stats["count"] += count stats['total_loss'] += self.loss_function( output, target) * count for metric in self.metrics: stats['total_' + metric.name] += metric(output, target) * count logger.info("E{:4}B{:4}: total loss={:10.3e}" .format(epoch, i, stats['total_loss'] / stats['count'])) # Keep globally averaged loss / metrics, etc. stats["loss"] = global_average( stats["total_loss"], stats["count"]).item() for metric in self.metrics: stats[metric.name] = global_average( stats['total_' + metric.name], stats['count']).item() del stats['total_' + metric.name] del stats['count'], stats['total_loss'] stats_list.append(stats) return stats_list
def _validate( dataloader, model, loss_function, metrics, dtype, transform_target_type=None, use_cuda=False, max_batch_per_epoch=None, ): """Evaluate the model on the test dataset. Args: dataloader (:obj:`torch.utils.data.DataLoader`): The validation set model (`obj`:torch.nn.Module): The model to train loss_function (`obj`:torch.nn.Module): The loss function metrics (list): List of metrics to track dtype (str): The datatype to use, one of `fp32`or `fp64` transform_target_type (str): Datatype to convert data to, default: `None` use_cuda (bool): Whether to use GPU for training, default: `False` max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch, default: `None` (all batches) """ # Initialize the accumulators for loss and metrics losses = AverageMeter() for metric in metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): data_iter = iterate_dataloader( dataloader, dtype, max_batch_per_epoch, use_cuda, transform_target_type ) for data, target in data_iter: # Inference output = model(data) # Compute loss loss = loss_function(output, target) # Update loss losses.update(loss.item(), data.size(0)) # Update metrics for metric in metrics: metric_value = metric(loss, output, target) metric.update(metric_value, data.size(0)) # Aggregate metrics and loss for all workers metrics_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() return metrics_averages, loss_average
def validation_round( val_loader, metrics, model, loss_func, iter_size, translator, tracker=None, use_cuda=False, ): # Set tracker and model in eval mode model.eval() if tracker: tracker.validation() tracker.validation_start() losses = AverageMeter() # Reset metrics for metric in metrics: metric.reset() with torch.no_grad(): for (data, target) in val_loader: data, target = prepare_batch(data, target, use_cuda=use_cuda) output = compute_model_output(model, data, target) # Compute loss loss, loss_per_token = compute_loss(data, target, output, loss_func, iter_size) # Update loss losses.update(loss_per_token, 1) # Update metrics translated, targets = translator.translate(data, target) for metric in metrics: metric_value = metric(translated, targets) size = data[0].shape[1] metric.update(metric_value, size) metrics_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() if tracker: tracker.validation_end() return metrics_averages, loss_average
def record_validation_stats(metrics_values, loss, tracker=None, rank=0): """Records the stats of a previously run validation Args: metrics_values (dict): Dictionary of each metric's average. loss (float): Validation loss tracker (`obj`:mlbench_core.utils.Tracker, optional): Tracker object to use. rank (int): Current distributed rank Returns: (bool): Whether this validation round is the best """ if len(metrics_values) > 0: # Save if tracker: for metric, value in metrics_values.items(): tracker.record_metric(metric, value, log_to_api=rank == 0) tracker.record_stat( "global_{}".format(metric.name), value, log_to_api=rank == 0, ) if rank == 0 and tracker: logger.info( "{} for rank {}:(best epoch {}, current epoch {}): {:.3f}". format( tracker.primary_metric.name, tracker.rank, tracker.best_epoch, tracker.current_epoch, tracker.best_metric_value, )) else: if rank == 0: logger.info("Validation loss={:.3f}".format(loss)) if tracker: tracker.record_loss(loss, log_to_api=True) global_loss = global_average(loss, 1).item() if rank == 0: tracker.record_stat("global_loss", global_loss, log_to_api=True) return tracker.is_best() if tracker else False
def validation_round(loader, metrics, criterion, translator, tracker, use_cuda=False): """Performs one round of validation for the Transformer model Args: loader (:obj:`torch.utils.data.DataLoader`): Data loader metrics (list): List of metrics for evaluation criterion (:obj:`torch.nn.Module): Loss function translator (:obj:`mlbench_core.models.pytorch.transformer.SequenceGenerator`): Translator module tracker (:obj:`mlbench_core.utils.Tracker`): Current Tracker use_cuda (bool): Use GPU acceleration. Default: `False`. Returns: (dict of :obj:`mlbench_core.evaluation.pytorch.MLBenchMetric`: float, float): The metrics averages over all workers, and the loss average. """ model = translator.model model.eval() tracker.validation() tracker.validation_start() losses = AverageMeter() for metric in metrics: metric.reset() with torch.no_grad(): for batch in loader: batch = prepare_batch(batch, use_cuda=use_cuda) output = model(**batch["net_input"]) loss, sample_size = compute_loss(batch, output, criterion) losses.update(loss.item() / sample_size, 1) translated, targets = translator.translate_batch(batch) for metric in metrics: metric_value = metric(translated, targets) size = batch["target"].size(0) # Number of translated sentences metric.update(metric_value, size) metric_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count) tracker.validation_end() return metric_averages, loss_average
def validate(self, dataloader): r"""Validate the quality of the model in terms of loss and metrics. Args: dataloader (:obj:`torch.utils.data.DataLoader`): The validation set """ # Turn on evaluation mode for the model self.model.eval() # Initialize the accumulators for loss and metrics losses = AverageMeter() for metric in self.metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): data_iter = iterate_dataloader(dataloader, self.dtype, self.max_batch_per_epoch, self.use_cuda, self.transform_target_type) for data, target in data_iter: # Inference output = self.model(data) # Compute loss loss = self.loss_function(output, target) # Update loss losses.update(loss.item(), data.size(0)) # Update metrics for metric in self.metrics: metric_value = metric(output, target) metric.update(metric_value, data.size(0)) # Aggregate metrics and loss for all workers metrics_averages = { metric.name: metric.average().item() for metric in self.metrics } loss_average = global_average(losses.sum, losses.count).item() return metrics_averages, loss_average
def validation_round(loader, metrics, criterion, translator, tracker=None, use_cuda=False): model = translator.model model.eval() if tracker: tracker.validation() tracker.validation_start() losses = AverageMeter() for metric in metrics: metric.reset() with torch.no_grad(): for batch in loader: batch = prepare_batch(batch, use_cuda=use_cuda) output = model(**batch["net_input"]) loss, sample_size = compute_loss(batch, output, criterion) losses.update(loss.item() / sample_size, 1) translated, targets = translator.translate_batch(batch) for metric in metrics: metric_value = metric(loss.item(), translated, targets) size = batch["target"].size( 0) # Number of translated sentences metric.update(metric_value, size) metric_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count) if tracker: tracker.validation_end() return metric_averages, loss_average
def average(self): """Average stats.""" return global_average(self.top.sum, self.top.count)
def average(self): return global_average(self.average_meter.sum, self.average_meter.count)
def validation_round( dataloader, model, loss_function, metrics, run_id, rank, dtype, transform_target_type=None, use_cuda=False, max_batch_per_epoch=None, tracker=None, ): """ Handles one full iteration of validation on the whole validation set. Args: dataloader (:obj:`torch.utils.data.DataLoader`): The validation set model (`obj`:torch.nn.Module): The model to train loss_function (`obj`:torch.nn.Module): The loss function metrics (list): List of metrics to track run_id (int): The id of the current run rank (int): The rank of the current worker node dtype (str): The datatype to use, one of `fp32`or `fp64` transform_target_type (str): Datatype to convert data to, default: `None` use_cuda (bool): Whether to use GPU for training, default: `False` max_batch_per_epoch (int): Maximum number of batches tot rain for per epoch, default: `None` (all batches) tracker (`obj`:mlbench_core.utils.Tracker): Tracker object to use. Will be created if not supplied """ model.eval() if tracker: tracker.validation() tracker.validation_start() metrics_values, loss = _validate( dataloader, model, loss_function, metrics, dtype, transform_target_type, use_cuda, max_batch_per_epoch, ) if tracker: tracker.validation_end() if len(metrics_values) > 0: # Save if tracker: for metric, value in metrics_values.items(): tracker.record_metric(metric, value, log_to_api=True) global_metric_value = global_average(value, 1).item() if rank == 0: tracker.record_stat( "global_{}".format(metric.name), global_metric_value, log_to_api=True, ) if rank == 0 and tracker: logger.info( "{} for rank {}:(best epoch {}, current epoch {}): {:.3f}". format( tracker.primary_metric.name, tracker.rank, tracker.best_epoch, tracker.current_epoch, tracker.best_metric_value, )) else: if rank == 0: logger.info("Validation loss={:.3f}".format(loss)) if tracker: tracker.record_loss(loss, log_to_api=True) global_loss = global_average(loss, 1).item() if rank == 0: tracker.record_stat("global_loss", global_loss, log_to_api=True) return tracker.is_best() if tracker else False
def validation_round( dataloader, model, loss_function, metrics, dtype, tracker=None, transform_target_type=False, use_cuda=False, max_batches=None, ): """Evaluate the model on the test dataset. Args: dataloader (`obj`:torch.utils.data.DataLoader): The validation set model (`obj`:torch.nn.Module): The model to train loss_function (`obj`:torch.nn.Module): The loss function metrics (list): List of metrics to track dtype (str): The datatype to use, one of `fp32`or `fp64` tracker (`obj`:mlbench_core.utils.Tracker | None): Tracker object to use. transform_target_type (bool): Convert target to `dtype`. Default `False` use_cuda (bool): Whether to use GPU for training, default: `False` max_batches (int | None): Maximum number of batches to validate on Returns: (dict, float): Dictionary of average of each metric, and average validation loss """ model.eval() if tracker: tracker.validation() tracker.validation_start() # Initialize the accumulators for loss and metrics losses = AverageMeter() for metric in metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): data_iter = iterate_dataloader(dataloader, dtype, max_batches, use_cuda, transform_target_type) for data, target in data_iter: output = model(data) # Compute loss loss = loss_function(output, target) # Update loss losses.update(loss.item(), data.size(0)) # Update metrics for metric in metrics: metric_value = metric(output, target) metric.update(metric_value, data.size(0)) # Aggregate metrics and loss for all workers metrics_averages = {metric: metric.average().item() for metric in metrics} loss_average = global_average(losses.sum, losses.count).item() if tracker: tracker.validation_end() return metrics_averages, loss_average
def validation_round( val_set, model, batch_size, metrics, loss_function, tracker, use_cuda=False ): """Performs a validation round Args: val_set (:obj:): Validation set model (:obj:`torch.nn.Module`): Model to evaluate batch_size (int): Validation batch size metrics (list): List of metrics to compute loss_function (:obj:`torch.nn.Module`): Loss function tracker (:obj:`mlbench_core.utils.Tracker`): Tracker object use_cuda (bool): Use GPU acceleration Returns: (dict, float): Metric averages and total loss average """ # finish one epoch training and to decide if we want to val our model. tracker.validation() tracker.validation_start() # each worker finish one epoch training. model.eval() losses = AverageMeter() for metric in metrics: metric.reset() # Each worker computer their own losses and metrics with torch.no_grad(): hidden = model.init_hidden(batch_size) num_batches = val_set.num_batches() for batch_idx in range(num_batches): data, target = val_set.get_batch(batch_idx, cuda=use_cuda) batch_seq_len = data.size(0) # Inference output, hidden = model(data, hidden) # Compute loss loss = loss_function(output, target) # Update loss losses.update(loss.item(), batch_seq_len) hidden = repackage_hidden(hidden) # Update metrics for metric in metrics: metric_value = metric(output, target) metric.update(metric_value, 1) # Aggregate metrics and loss for all workers loss_average = global_average(losses.sum, losses.count) metrics_averages = { metric: torch.exp(loss_average).item() if metric.name == "Perplexity" else metric.average().item() for metric in metrics } logger.info( "Got loss {}, avg metric={}".format( loss_average, [m.average().item() for m in metrics if m.name == "Perplexity"][0], ) ) tracker.validation_end() return metrics_averages, loss_average.item()