Python calculate_batch_hash示例，sotabencheval.utils.calculate_batch_hash Python示例

示例#1

0

显示文件

    def add(self, outputs: np.ndarray, targets: np.ndarray):
        """
        Update the evaluator with new results from the model

        :param outputs (np.ndarray): 1D np.ndarray of semantic class predictions per pixel
        :param targets (np.ndarray): 1D np.ndarray of ground truth semantic classes per pixel

        The method requires an outputs input and a targets input - both flattened.

        Suppose you are making predictions, batch by batch, and have your model outputs
        and the original targets with batch_size 32, and image size 520 x 480.
        The shape of your outputs might look like this:

        batch_output.shape
        >> (32, 21, 520, 480) # where 21 is the number of ADE20K classes

        batch_target.shape
        >> (32, 520, 480)

        We can flatten the entire output and targets to 1D vectors for each pixel:

        flattened_batch_output.shape
        >> (7987200) # flatten by taking the max class prediction
                     #  (batch_output.argmax(1).flatten() in torch with class as second dimension)

        flattened_batch_target.shape
        >> (7987200) # (batch_target.flatten() in torch)

        The output might look something like this:

        flattened_batch_output
        >> array([6, 6, 6, 6, 6, ...])

        flattened_batch_target
        >> array([6, 6, 6, 6, 6, ...])

        In both cases, the prediction and ground truth have class 6 as the semantic label for the first 5
        pixels - so the model is correct.

        These flattened arrays can then be passed into the .add() method of the evaluator

        .. code-block:: python

            my_evaluator.update(outputs=flattened_batch_output,
                                        targets=flattened_batch_target)


        :return: void - updates self.ade20k_evaluator with the data, and updates self.targets and self.outputs
        """
        self.ade20k_evaluator.update(targets, outputs)

        self.targets = np.append(self.targets, targets)
        self.outputs = np.append(self.outputs, outputs)

        if not self.first_batch_processed:
            acc_global, acc, iu = self.ade20k_evaluator.compute()
            self.batch_hash = calculate_batch_hash(np.append(
                np.append(np.around(targets, 3), np.around(outputs, 3)),
                np.around(np.array([acc_global.item(), iu.mean().item()]), 3)))
            self.first_batch_processed = True

示例#2

0

显示文件

文件： wmt.py 项目： zhiwuya/sotabench-eval

    def add(self, answers: Dict[str, str]):
        """
        Updates the evaluator with new results

        :param answers: a dict where keys are source segments ids and values are translated segments
            (segment id is created by concatenating document id and the original segment id,
            separated by `#`.)

        Examples:
            Update the evaluator with three results:

            .. code-block:: python

                my_evaluator.add({
                    'bbc.381790#1': 'Waliser AMs sorgen sich um "Aussehen wie Muppets"',
                    'bbc.381790#2': 'Unter einigen AMs herrscht Bestürzung über einen...',
                    'bbc.381790#3': 'Sie ist aufgrund von Plänen entstanden, den Namen...'
                })

        .. seealso:: `source_segments`
        """

        self.metrics.add(answers)

        if not self.first_batch_processed and self.metrics.has_data:
            self.batch_hash = calculate_batch_hash(
                self.cache_values(
                    answers=self.metrics.answers,
                    metrics=self.metrics.get_results(ignore_missing=True)))
            self.first_batch_processed = True

示例#3

0

显示文件

    def add(self, answers: Dict[str, str]):
        """
        Updates the evaluator with new results

        :param answers: a dictionary, where keys are question ids and values are text answers.
            For unanswerable questions (SQuAD v2.0) the answer should be an empty string.

        Examples:
            Update the evaluator with two results:

            .. code-block:: python

                my_evaluator.add({
                    "57296d571d04691400779413": "itself",
                    "5a89117e19b91f001a626f2d": ""
                })
        """

        self.metrics.add(answers)

        if not self.first_batch_processed and self.metrics.has_data:
            self.batch_hash = calculate_batch_hash(
                self.cache_values(
                    answers=self.metrics.answers,
                    metrics=self.metrics.get_results(ignore_missing=True)))
            self.first_batch_processed = True

示例#4

0

显示文件

    def add(self, answers: Dict[str, str]):
        self.metrics.add(answers)

        if not self.first_batch_processed and self.metrics.has_data:
            self.batch_hash = calculate_batch_hash(
                self.cache_values(answers=self.metrics.answers,
                                  metrics=self.metrics.get_results(ignore_missing=True))
            )
            self.first_batch_processed = True

示例#5

0

显示文件

    def add(self, pairIds, predictions):
        """
            pairIDToLabel - Dictionary mapping pairID (str) to label (str)              
        """
        if isinstance(pairIds, str):
            pairIds = [pairIds]
            predictions = [predictions]

        self.matched.add(pairIds, predictions)
        self.mismatched.add(pairIds, predictions)
        if self.batch_hash is None and self.matched.count + self.mismatched.count > 100:
            content = self.cache_values(matched=self.matched.answers,
                                        mismatched=self.mismatched.answers)
            self.batch_hash = calculate_batch_hash(content)
            self.first_batch_processed = True  #TODO: do we need this if we have self.batch_hash

示例#6

0

显示文件

    def add(self, output_dict: dict):
        """
        Updates the evaluator with new results

        :param output_dict: (dict) Where keys are image IDs, and each value should be an 1D np.ndarray of size 1000
        containing logits for that image ID.
        :return: void - updates self.outputs with the new IDSs and prediction

        Examples:
            Update the evaluator with two results:

            .. code-block:: python

                my_evaluator.add({'ILSVRC2012_val_00000293': np.array([1.04243, ...]),
                'ILSVRC2012_val_00000294': np.array([-2.3677, ...])})
        """
        if not output_dict:
            print('Empty output_dict; will not process')
            return

        self.outputs = dict(
            list(self.outputs.items()) + list(output_dict.items()))

        for i, dict_key in enumerate(output_dict.keys()):
            output = self.outputs[dict_key]
            target = self.targets[dict_key]
            prec1 = top_k_accuracy_score(y_true=target,
                                         y_pred=np.array([output]),
                                         k=1)
            prec5 = top_k_accuracy_score(y_true=target,
                                         y_pred=np.array([output]),
                                         k=5)
            self.top1.update(prec1, 1)
            self.top5.update(prec5, 1)

        if not self.first_batch_processed:
            hash_dict = output_dict
            hash_dict['Top 1 Accuracy'] = self.top1.avg
            hash_dict['Top 5 Accuracy'] = self.top5.avg
            self.batch_hash = calculate_batch_hash(hash_dict)
            self.first_batch_processed = True

示例#7

0

显示文件

文件： coco.py 项目： xperience-ai/sotabench-eval

    def add(self, detections: list):
        """
        Update the evaluator with new detections

        :param annotations (list): List of detections, that will be used by the COCO.loadRes method in the
        pycocotools API.  Each detection can take a dictionary format like the following:

        {'image_id': 397133, 'bbox': [386.1628112792969, 69.48855590820312, 110.14895629882812, 278.2847595214844],
        'score': 0.999152421951294, 'category_id': 1}

        I.e is a list of dictionaries.

        :return: void - updates self.detection with the new IDSs and prediction

        Examples:
            Update the evaluator with two results:

            .. code-block:: python

                my_evaluator.add([{'image_id': 397133, 'bbox': [386.1628112792969, 69.48855590820312,
                110.14895629882812, 278.2847595214844], 'score': 0.999152421951294, 'category_id': 1}])
        """
        self.detections.extend(detections)

        self.coco_evaluator.update(detections)

        if not self.first_batch_processed:
            self.coco_evaluator.evaluate()
            self.coco_evaluator.accumulate()

            if any(
                [detection["bbox"] for detection in detections]
            ):  # we can only hash if we have predictions
                self.batch_hash = calculate_batch_hash(
                    self.cache_values(
                        annotations=detections,
                        metrics=get_coco_metrics(self.coco_evaluator),
                    )
                )
                self.first_batch_processed = True

示例#8

0

显示文件

文件： wikitext.py 项目： PiotrCzapla/sotabench-eval

    def add(self, log_probs, targets):
        """
        Updates the evaluator with new results

        :param log_probs: `np.ndarray` or `torch.tensor` with log probability of target tokens can be either:
            - a 0d tensor
                summed log probability of all `targets` tokens, or 
            - a 2d tensor [bs x seq_len]
                log probabilities of each target token, the shape of `log_probs`, `targets` must match.
            - a 3d tensor [bs x seq_len x vocab_size] 
                 distribution of log probabilities for each position in the sequence,
                 we will gather the probabilities of target tokens for you.
        :param targets: a `np.ndarray` or `torch.tensor`  with ids of ground truth tokens.

        Examples:
            Update the evaluator with a result for a sentence with 10 tokens:

            .. code-block:: python
                log_probs = np.array([[ 32, 582, 2731, 19, 1, 786,  5, 98693, 55362, 5 ]])
                targets = np.array([[ -9.8461,  -9.3343, -17.8042, -11.2006, -22.3345, -14.4665,  -2.0055,
                                    -14.2044, -14.7545,  -5.7888]])
                my_evaluator.add(log_probs, targets)
        """
        if isinstance(log_probs, float):
            log_probs = np.array([log_probs]) #  for sum to work
        elif log_probs.shape[:-1] == targets.shape:
            log_probs, targets = _gather_probs(log_probs, targets)
        else:
            assert log_probs.shape == targets.shape, f"log_probs have to be ether gathered log probabilities of targets or all probabilites, received {log_probs.shape} {repr(log_probs)}"
        self._neglogloss += - float(log_probs.sum())
        self._data_set_size += int(np.prod(list(targets.shape)))

        if not self.first_batch_processed:
            content = self.cache_values(
                probs=_to_numpy(log_probs)[0].reshape(-1),
                api_version=3)
            self.batch_hash = calculate_batch_hash(content)
            self.first_batch_processed = True
        return self.results