def add(self, outputs: np.ndarray, targets: np.ndarray): """ Update the evaluator with new results from the model :param outputs (np.ndarray): 1D np.ndarray of semantic class predictions per pixel :param targets (np.ndarray): 1D np.ndarray of ground truth semantic classes per pixel The method requires an outputs input and a targets input - both flattened. Suppose you are making predictions, batch by batch, and have your model outputs and the original targets with batch_size 32, and image size 520 x 480. The shape of your outputs might look like this: batch_output.shape >> (32, 21, 520, 480) # where 21 is the number of ADE20K classes batch_target.shape >> (32, 520, 480) We can flatten the entire output and targets to 1D vectors for each pixel: flattened_batch_output.shape >> (7987200) # flatten by taking the max class prediction # (batch_output.argmax(1).flatten() in torch with class as second dimension) flattened_batch_target.shape >> (7987200) # (batch_target.flatten() in torch) The output might look something like this: flattened_batch_output >> array([6, 6, 6, 6, 6, ...]) flattened_batch_target >> array([6, 6, 6, 6, 6, ...]) In both cases, the prediction and ground truth have class 6 as the semantic label for the first 5 pixels - so the model is correct. These flattened arrays can then be passed into the .add() method of the evaluator .. code-block:: python my_evaluator.update(outputs=flattened_batch_output, targets=flattened_batch_target) :return: void - updates self.ade20k_evaluator with the data, and updates self.targets and self.outputs """ self.ade20k_evaluator.update(targets, outputs) self.targets = np.append(self.targets, targets) self.outputs = np.append(self.outputs, outputs) if not self.first_batch_processed: acc_global, acc, iu = self.ade20k_evaluator.compute() self.batch_hash = calculate_batch_hash(np.append( np.append(np.around(targets, 3), np.around(outputs, 3)), np.around(np.array([acc_global.item(), iu.mean().item()]), 3))) self.first_batch_processed = True
def add(self, answers: Dict[str, str]): """ Updates the evaluator with new results :param answers: a dict where keys are source segments ids and values are translated segments (segment id is created by concatenating document id and the original segment id, separated by `#`.) Examples: Update the evaluator with three results: .. code-block:: python my_evaluator.add({ 'bbc.381790#1': 'Waliser AMs sorgen sich um "Aussehen wie Muppets"', 'bbc.381790#2': 'Unter einigen AMs herrscht Bestürzung über einen...', 'bbc.381790#3': 'Sie ist aufgrund von Plänen entstanden, den Namen...' }) .. seealso:: `source_segments` """ self.metrics.add(answers) if not self.first_batch_processed and self.metrics.has_data: self.batch_hash = calculate_batch_hash( self.cache_values( answers=self.metrics.answers, metrics=self.metrics.get_results(ignore_missing=True))) self.first_batch_processed = True
def add(self, answers: Dict[str, str]): """ Updates the evaluator with new results :param answers: a dictionary, where keys are question ids and values are text answers. For unanswerable questions (SQuAD v2.0) the answer should be an empty string. Examples: Update the evaluator with two results: .. code-block:: python my_evaluator.add({ "57296d571d04691400779413": "itself", "5a89117e19b91f001a626f2d": "" }) """ self.metrics.add(answers) if not self.first_batch_processed and self.metrics.has_data: self.batch_hash = calculate_batch_hash( self.cache_values( answers=self.metrics.answers, metrics=self.metrics.get_results(ignore_missing=True))) self.first_batch_processed = True
def add(self, answers: Dict[str, str]): self.metrics.add(answers) if not self.first_batch_processed and self.metrics.has_data: self.batch_hash = calculate_batch_hash( self.cache_values(answers=self.metrics.answers, metrics=self.metrics.get_results(ignore_missing=True)) ) self.first_batch_processed = True
def add(self, pairIds, predictions): """ pairIDToLabel - Dictionary mapping pairID (str) to label (str) """ if isinstance(pairIds, str): pairIds = [pairIds] predictions = [predictions] self.matched.add(pairIds, predictions) self.mismatched.add(pairIds, predictions) if self.batch_hash is None and self.matched.count + self.mismatched.count > 100: content = self.cache_values(matched=self.matched.answers, mismatched=self.mismatched.answers) self.batch_hash = calculate_batch_hash(content) self.first_batch_processed = True #TODO: do we need this if we have self.batch_hash
def add(self, output_dict: dict): """ Updates the evaluator with new results :param output_dict: (dict) Where keys are image IDs, and each value should be an 1D np.ndarray of size 1000 containing logits for that image ID. :return: void - updates self.outputs with the new IDSs and prediction Examples: Update the evaluator with two results: .. code-block:: python my_evaluator.add({'ILSVRC2012_val_00000293': np.array([1.04243, ...]), 'ILSVRC2012_val_00000294': np.array([-2.3677, ...])}) """ if not output_dict: print('Empty output_dict; will not process') return self.outputs = dict( list(self.outputs.items()) + list(output_dict.items())) for i, dict_key in enumerate(output_dict.keys()): output = self.outputs[dict_key] target = self.targets[dict_key] prec1 = top_k_accuracy_score(y_true=target, y_pred=np.array([output]), k=1) prec5 = top_k_accuracy_score(y_true=target, y_pred=np.array([output]), k=5) self.top1.update(prec1, 1) self.top5.update(prec5, 1) if not self.first_batch_processed: hash_dict = output_dict hash_dict['Top 1 Accuracy'] = self.top1.avg hash_dict['Top 5 Accuracy'] = self.top5.avg self.batch_hash = calculate_batch_hash(hash_dict) self.first_batch_processed = True
def add(self, detections: list): """ Update the evaluator with new detections :param annotations (list): List of detections, that will be used by the COCO.loadRes method in the pycocotools API. Each detection can take a dictionary format like the following: {'image_id': 397133, 'bbox': [386.1628112792969, 69.48855590820312, 110.14895629882812, 278.2847595214844], 'score': 0.999152421951294, 'category_id': 1} I.e is a list of dictionaries. :return: void - updates self.detection with the new IDSs and prediction Examples: Update the evaluator with two results: .. code-block:: python my_evaluator.add([{'image_id': 397133, 'bbox': [386.1628112792969, 69.48855590820312, 110.14895629882812, 278.2847595214844], 'score': 0.999152421951294, 'category_id': 1}]) """ self.detections.extend(detections) self.coco_evaluator.update(detections) if not self.first_batch_processed: self.coco_evaluator.evaluate() self.coco_evaluator.accumulate() if any( [detection["bbox"] for detection in detections] ): # we can only hash if we have predictions self.batch_hash = calculate_batch_hash( self.cache_values( annotations=detections, metrics=get_coco_metrics(self.coco_evaluator), ) ) self.first_batch_processed = True
def add(self, log_probs, targets): """ Updates the evaluator with new results :param log_probs: `np.ndarray` or `torch.tensor` with log probability of target tokens can be either: - a 0d tensor summed log probability of all `targets` tokens, or - a 2d tensor [bs x seq_len] log probabilities of each target token, the shape of `log_probs`, `targets` must match. - a 3d tensor [bs x seq_len x vocab_size] distribution of log probabilities for each position in the sequence, we will gather the probabilities of target tokens for you. :param targets: a `np.ndarray` or `torch.tensor` with ids of ground truth tokens. Examples: Update the evaluator with a result for a sentence with 10 tokens: .. code-block:: python log_probs = np.array([[ 32, 582, 2731, 19, 1, 786, 5, 98693, 55362, 5 ]]) targets = np.array([[ -9.8461, -9.3343, -17.8042, -11.2006, -22.3345, -14.4665, -2.0055, -14.2044, -14.7545, -5.7888]]) my_evaluator.add(log_probs, targets) """ if isinstance(log_probs, float): log_probs = np.array([log_probs]) # for sum to work elif log_probs.shape[:-1] == targets.shape: log_probs, targets = _gather_probs(log_probs, targets) else: assert log_probs.shape == targets.shape, f"log_probs have to be ether gathered log probabilities of targets or all probabilites, received {log_probs.shape} {repr(log_probs)}" self._neglogloss += - float(log_probs.sum()) self._data_set_size += int(np.prod(list(targets.shape))) if not self.first_batch_processed: content = self.cache_values( probs=_to_numpy(log_probs)[0].reshape(-1), api_version=3) self.batch_hash = calculate_batch_hash(content) self.first_batch_processed = True return self.results