def get_metrics(model: Model) -> Dict[str, float]: """ Gets the metrics but sets ``"loss"`` to the total loss divided by the ``num_batches`` so that the ``"loss"`` metric is "average loss per batch". """ model.decode() metrics = model.output_dict['metrics'] # metrics["loss"] = float(total_loss / num_batches) if num_batches > 0 else 0.0 return metrics
def get_model_predictions(model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int) -> (Dict[str, Any], List): model.eval() model_predictions = [] iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances)) for batch in generator_tqdm: result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) return model.get_metrics(), model_predictions
def get_predictions(self, instances: List[Instance], model: Model, cuda_device: int = -1, prediction_file: Optional[str] = None, visualization_file: Optional[str] = None, verbose: bool = False) -> List[Dict]: """ We use this function to get predictions We use a basic itereator, since a bucket iterator shuffles data, even for shuffle=False Arguments: data (List[Instance]) : The list of instances for inference model (Model) : The model being used for predictions cuda_device (int) : The cuda device being used for processing verbose (bool) : Log accuracies and such Returns: predictions (List[Dict]) : The predictions. Each contains the following keys * text (List[str]): The tokens * pred (List[Tuple[str, float]]): The predicted labels and probs. Can potentially have multiple labels being predicted * gold (List[str]): The gold labels can potentially have multiple gold labels * pred_labels (List[str]): Predicted labels for segmentation Note that an this method is implemented by the base classes * attn (Dict[str, List[float]]) : A dictionary mapping tags to attention values * gold_labels : The gold labels for segmentation The gold labels for segmentation Additionally, this class stores the base_predictions, as well as the visualization, if visualization is set to True, and base_dir is provided """ iterator = self._iterator(instances, num_epochs=1, shuffle=False, cuda_device=cuda_device, for_training=False) model.eval() num_batches = self._iterator.get_num_batches(instances) inference_generator_tqdm = Tqdm.tqdm(iterator, total=num_batches) predictions = [] index = 0 matrix = { self._indexer.ix2tags[ix]: { "tp": 0., "fp": 0, "fn": 0., "tn": 0. } for ix in range(len(self._indexer.ix2tags)) } for batch in inference_generator_tqdm: # Currently I don't support multi-gpu data parallel output_dict = model.decode(model(**batch)) for ix in range(len(output_dict["preds"])): text = self._get_text_from_instance(instances[index]) pred = output_dict["preds"][ix] gold = [ self._indexer.get_tag(label) for label in instances[index].fields['labels'].labels ] attn = output_dict["attentions"][ix] gold_labels = instances[index].fields['tags'].labels assert all([len(attn[x]) == len(text) for x in attn]) gold_labels = self._indexer.extract_relevant(gold_labels) pred_labels = self.get_segmentation_from_prediction( text=text, preds_probs=pred, attns=attn) assert len(pred_labels) == len(gold_labels) == len(text) gold_set = set(gold) pred_set, _ = [set(list(x)) for x in zip(*pred)] # import pdb; pdb.set_trace() for tag in matrix: if tag in gold_set and tag in pred_set: matrix[tag]["tp"] += 1 elif tag not in gold_set and tag in pred_set: matrix[tag]["fp"] += 1 elif tag in gold_set and tag not in pred_set: matrix[tag]["fn"] += 1. else: matrix[tag]["tn"] += 1. preds = [[x[0], float(x[1])] for x in pred] prediction = { "text": text, "pred": preds, "gold": gold, "attn": attn, "pred_labels": pred_labels, "gold_labels": gold_labels } predictions.append(prediction) index += 1 if prediction_file is not None and prediction_file != "": with open(prediction_file, "w") as f: json.dump(predictions, f, ensure_ascii=True, indent=4) if visualization_file is not None and self._visualize and \ visualization_file != "": self.visualize(predictions, visualization_file) if verbose: accs = [] for tag in matrix: acc = (matrix[tag]["tp"] + matrix[tag]["tn"]) / \ sum(matrix[tag].values()) * 100. logger.info(f"Tag: {tag}, Acc: {acc:.2f}") accs.append(acc) avg_acc = sum(accs) / len(accs) logger.info(f"Average ACC: {avg_acc:.2f}") p, r, f = fscore_from_preds(predictions, False) return predictions
def evaluate( model: Model, instances: Iterable[Instance], data_iterator: DataIterator, cuda_device: int, batch_weight_key: str, ) -> Dict[str, Any]: check_for_gpu(cuda_device) with torch.no_grad(): model.eval() iterator = data_iterator(instances, num_epochs=1, shuffle=False) logger.info("Iterating over dataset") generator_tqdm = Tqdm.tqdm( iterator, total=data_iterator.get_num_batches(instances)) # Number of batches in instances. batch_count = 0 # Number of batches where the model produces a loss. loss_count = 0 # Cumulative weighted loss total_loss = 0.0 # Cumulative weight across all batches. total_weight = 0.0 for batch in generator_tqdm: batch_count += 1 batch = nn_util.move_to_device(batch, cuda_device) output_dict = model(**batch) loss = output_dict.get("loss") model.decode() metrics = model.output_dict['metrics'] if loss is not None: loss_count += 1 if batch_weight_key: weight = output_dict[batch_weight_key].item() else: weight = 1.0 total_weight += weight total_loss += loss.item() * weight # Report the average loss so far. metrics["loss"] = total_loss / total_weight if not HasBeenWarned.tqdm_ignores_underscores and any( metric_name.startswith("_") for metric_name in metrics): logger.warning('Metrics with names beginning with "_" will ' "not be logged to the tqdm progress bar.") HasBeenWarned.tqdm_ignores_underscores = True description = (", ".join([ "%s: %.2f" % (name, value) for name, value in metrics.items() if not name.startswith("_") ]) + " ||") generator_tqdm.set_description(description, refresh=False) model.decode(reset=True) final_metrics = model.output_dict['metrics'] # Sanity check if loss_count != batch_count: raise RuntimeError( "The model you are trying to evaluate only sometimes " + "produced a loss!") final_metrics["loss"] = total_loss / total_weight return final_metrics