Exemplo n.º 1
0
    def benchmark(cls,
                  model,
                  input_transform=None,
                  target_transform=None,
                  model_output_transform=None,
                  send_data_to_device=None,
                  device: str = 'cuda',
                  data_root: str = './.data/vision/stl10',
                  num_workers: int = 4,
                  batch_size: int = 128,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)
        model.eval()

        if not input_transform:
            input_transform = cls.input_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        test_dataset = cls.dataset(data_root,
                                   split='test',
                                   transform=input_transform,
                                   target_transform=target_transform,
                                   download=True)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers,
                                 pin_memory=True)
        test_results, run_hash = evaluate_classification(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device)

        print(' * Acc@1 {top1:.3f} Acc@5 {top5:.3f}'.format(
            top1=test_results['Top 1 Accuracy'],
            top5=test_results['Top 5 Accuracy']))

        return BenchmarkResult(task=cls.task,
                               config=config,
                               dataset=cls.dataset.__name__,
                               results=test_results,
                               pytorch_hub_id=pytorch_hub_url,
                               model=paper_model_name,
                               arxiv_id=paper_arxiv_id,
                               pwc_id=paper_pwc_id,
                               paper_results=paper_results,
                               run_hash=run_hash)
Exemplo n.º 2
0
    def save(self, **kwargs):
        """
        Calculate results and then put into a BenchmarkResult object

        On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded
        on the platform.

        :return: BenchmarkResult object with results and metadata
        """

        # recalculate to ensure no mistakes made during batch-by-batch metric calculation
        self.get_results()

        return BenchmarkResult(
            task=self.task,
            config={},
            results=self.results,
            speed_mem_metrics=self.speed_mem_metrics,
            model=self.model_name,
            model_description=self.model_description,
            arxiv_id=self.paper_arxiv_id,
            pwc_id=self.paper_pwc_id,
            paper_results=self.paper_results,
            run_hash=self.batch_hash,
            **kwargs,
        )
Exemplo n.º 3
0
    def benchmark(cls,
                  model,
                  input_transform=None,
                  target_transform=None,
                  model_output_transform=None,
                  device: str = 'cuda',
                  data_root: str = './.data/vision/cifar10',
                  num_workers: int = 4,
                  batch_size: int = 8,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)

        if hasattr(model, 'eval'):
            model.eval()

        if not input_transform:
            input_transform = cls.input_transform

        test_dataset = cls.dataset(data_root,
                                   train=False,
                                   transform=input_transform,
                                   target_transform=target_transform,
                                   download=True)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers,
                                 pin_memory=True)
        test_results = evaluate_image_generation_gan(
            model=model,
            model_output_transform=model_output_transform,
            test_loader=test_loader,
            device=device)

        print(test_results)

        return BenchmarkResult(task=cls.task,
                               config=config,
                               dataset=cls.dataset.__name__,
                               results=test_results,
                               pytorch_hub_id=pytorch_hub_url,
                               model=paper_model_name,
                               arxiv_id=paper_arxiv_id,
                               pwc_id=paper_pwc_id,
                               paper_results=paper_results)
Exemplo n.º 4
0
    def save(self, **kwargs):
        """
        Calculate results and then put into a BenchmarkResult object

        On the sotabench.com server, this will produce a JSON file serialisation in sotabench_results.json and results
        will be recorded on the platform.

        Users should save once all predictions are added, for instance:

        .. code-block:: python

            from sotabencheval.question_answering import SQuADEvaluator, SQuADVersion

            evaluator = SQuADEvaluator(model_name='SpanBERT', paper_arxiv_id='1907.10529',
                version=SQuADVersion.V20)

            # processing/setup logic here

            evaluator.reset_time()

            for i, (input, target) in enumerate(data_loader):
                ...
                output = model(input)
                # potentially formatting of the output here
                evaluator.add(output)

            evaluator.save()

        Here once we have added all the predictions to the evaluator, we .save() so we evaluate and, if on the server,
        results are serialized and saved to the server.

        :return: BenchmarkResult object with results and metadata
        """
        # recalculate to ensure no mistakes made during batch-by-batch metric calculation
        self.get_results()

        return BenchmarkResult(
            task=self.task,
            config={},
            results=self.results,
            speed_mem_metrics=self.speed_mem_metrics,
            model=self.model_name,
            model_description=self.model_description,
            arxiv_id=self.paper_arxiv_id,
            pwc_id=self.paper_pwc_id,
            paper_results=self.paper_results,
            run_hash=self.batch_hash,
            **kwargs,
        )
Exemplo n.º 5
0
    def save(self):
        """
        Calculate results and then put into a BenchmarkResult object

        On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded
        on the platform.

        :return: BenchmarkResult object with results and metadata
        """
        # recalculate to ensure no mistakes made during batch-by-batch metric calculation
        self.get_results()

        # If this is the first time the model is run, then we record evaluation time information

        if not self.cached_results:
            unique_image_ids = set([d["image_id"] for d in self.detections])
            exec_speed = time.time() - self.init_time
            self.speed_mem_metrics["Tasks / Evaluation Time"] = (
                len(unique_image_ids) / exec_speed
            )
            self.speed_mem_metrics["Tasks"] = len(unique_image_ids)
            self.speed_mem_metrics["Evaluation Time"] = exec_speed
        else:
            self.speed_mem_metrics["Tasks / Evaluation Time"] = None
            self.speed_mem_metrics["Tasks"] = None
            self.speed_mem_metrics["Evaluation Time"] = None

        return BenchmarkResult(
            task=self.task,
            config={},
            dataset="COCO minival",
            results=self.results,
            speed_mem_metrics=self.speed_mem_metrics,
            model=self.model_name,
            model_description=self.model_description,
            arxiv_id=self.paper_arxiv_id,
            pwc_id=self.paper_pwc_id,
            paper_results=self.paper_results,
            run_hash=self.batch_hash,
        )
Exemplo n.º 6
0
    def benchmark(cls, model, input_transform=None, target_transform=None, transforms=None,
                  model_output_transform=None, collate_fn=None, send_data_to_device=None,
                  device: str = 'cuda', data_root: str = './.data/vision/camvid', num_workers: int = 4,
                  batch_size: int = 32, num_gpu: int = 1, paper_model_name: str = None,
                  paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None,
                  pytorch_hub_url: str = None) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(model, device=device, num_gpu=num_gpu)
        model.eval()

        if not input_transform or target_transform or transforms:
            transforms = cls.transforms

        if not model_output_transform:
            model_output_transform = cls.model_output_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        if not collate_fn:
            collate_fn = cls.collate_fn

        test_dataset = cls.dataset(root=data_root, split='val', transform=input_transform,
                                   target_transform=target_transform, transforms=transforms)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                                 num_workers=num_workers, pin_memory=True, collate_fn=collate_fn)
        test_loader.no_classes = 12  # Number of classes for CamVid
        test_results = evaluate_segmentation(model=model, test_loader=test_loader,
                                             model_output_transform=model_output_transform,
                                             send_data_to_device=send_data_to_device, device=device)

        print(test_results)

        return BenchmarkResult(task=cls.task, config=config, dataset=cls.dataset.__name__,
                               results=test_results, pytorch_hub_id=pytorch_hub_url,
                               model=paper_model_name, arxiv_id=paper_arxiv_id,
                               pwc_id=paper_pwc_id, paper_results=paper_results)
Exemplo n.º 7
0
    def save(self):
        """
        Calculate results and then put into a BenchmarkResult object

        On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded
        on the platform.

        :return: BenchmarkResult object with results and metadata
        """

        # recalculate to ensure no mistakes made during batch-by-batch metric calculation
        self.get_results()

        # If this is the first time the model is run, then we record evaluation time information

        if not self.cached_results:
            self.speed_mem_metrics['Tasks / Evaluation Time'] = None
            self.speed_mem_metrics['Tasks'] = None
            self.speed_mem_metrics['Evaluation Time'] = (time.time() - self.init_time)
        else:
            self.speed_mem_metrics['Tasks / Evaluation Time'] = None
            self.speed_mem_metrics['Tasks'] = None
            self.speed_mem_metrics['Evaluation Time'] = None

        return BenchmarkResult(
            task=self.task,
            config={},
            dataset='ADE20K val',
            results=self.results,
            speed_mem_metrics=self.speed_mem_metrics,
            model=self.model_name,
            model_description=self.model_description,
            arxiv_id=self.paper_arxiv_id,
            pwc_id=self.paper_pwc_id,
            paper_results=self.paper_results,
            run_hash=self.batch_hash,
        )
Exemplo n.º 8
0
    def benchmark(cls,
                  model,
                  model_description=None,
                  input_transform=None,
                  target_transform=None,
                  transforms=None,
                  model_output_transform=None,
                  collate_fn=None,
                  send_data_to_device=None,
                  device: str = "cuda",
                  data_root: str = "./.data/vision/pascalcontext",
                  num_workers: int = 4,
                  batch_size: int = 32,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None,
                  force: bool = False) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)
        model.eval()

        if not input_transform or target_transform or transforms:
            transforms = cls.transforms

        if not model_output_transform:
            model_output_transform = cls.model_output_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        if not collate_fn:
            collate_fn = cls.collate_fn

        test_dataset = cls.dataset(
            root=data_root,
            split="val",
            transform=input_transform,
            target_transform=target_transform,
            transforms=transforms,
        )
        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True,
            collate_fn=collate_fn,
        )
        test_loader.no_classes = 59  # Number of classes for PASCALContext
        test_results, speed_mem_metrics, run_hash = evaluate_segmentation(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device,
            force=force)

        print(test_results)

        return BenchmarkResult(
            task=cls.task,
            config=config,
            dataset=cls.dataset.__name__,
            results=test_results,
            speed_mem_metrics=speed_mem_metrics,
            pytorch_hub_id=pytorch_hub_url,
            model=paper_model_name,
            model_description=model_description,
            arxiv_id=paper_arxiv_id,
            pwc_id=paper_pwc_id,
            paper_results=paper_results,
        )
Exemplo n.º 9
0
    def benchmark(cls,
                  model,
                  model_description=None,
                  input_transform=None,
                  target_transform=None,
                  model_output_transform=None,
                  send_data_to_device=None,
                  device: str = "cuda",
                  data_root: str = "./.data/vision/mnist",
                  num_workers: int = 4,
                  batch_size: int = 128,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None,
                  force: bool = False) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)
        model.eval()

        if not input_transform:
            input_transform = cls.input_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        test_dataset = cls.dataset(
            data_root,
            train=False,
            transform=input_transform,
            target_transform=target_transform,
            download=True,
        )
        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True,
        )
        test_results, speed_mem_metrics, run_hash = evaluate_classification(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device,
            force=force)

        print(" * Acc@1 {top1:.3f} Acc@5 {top5:.3f}".format(
            top1=test_results["Top 1 Accuracy"],
            top5=test_results["Top 5 Accuracy"],
        ))

        return BenchmarkResult(
            task=cls.task,
            config=config,
            dataset=cls.dataset.__name__,
            results=test_results,
            speed_mem_metrics=speed_mem_metrics,
            pytorch_hub_id=pytorch_hub_url,
            model=paper_model_name,
            model_description=model_description,
            arxiv_id=paper_arxiv_id,
            pwc_id=paper_pwc_id,
            paper_results=paper_results,
            run_hash=run_hash,
        )
Exemplo n.º 10
0
    def benchmark(
        cls,
        model,
        model_description=None,
        encoder=None,
        context_length: int = 1024,
        model_output_transform=None,
        device: str = "cuda",
        data_root: str = "./.data/nlp/wikitext-103",
        num_workers: int = 4,
        batch_size: int = 8,
        num_gpu: int = 1,
        paper_model_name: str = None,
        paper_arxiv_id: str = None,
        paper_pwc_id: str = None,
        paper_results: dict = None,
        pytorch_hub_url: str = None,
        force: bool = False
    ) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(
            model, device=device, num_gpu=num_gpu
        )
        model.eval()

        if not encoder:
            raise ValueError(
                "Please provide an encoder to evaluate on this benchmark!"
            )

        # Test Split

        test_dataset = cls.dataset(
            data_root,
            split="test",
            context_length=context_length,
            encoder=encoder,
            download=True,
        )
        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True,
        )
        test_results, run_hash = evaluate_language_model(
            model=model,
            model_output_transform=model_output_transform,
            send_data_to_device=cls.send_data_to_device,
            test_loader=test_loader,
            device=device,
            force=force
        )

        # Valid Split

        valid_dataset = cls.dataset(
            data_root,
            split="valid",
            context_length=context_length,
            encoder=encoder,
            download=True,
        )
        valid_loader = DataLoader(
            valid_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True,
        )
        valid_results, valid_run_hash = evaluate_language_model(
            model=model,
            model_output_transform=model_output_transform,
            send_data_to_device=cls.send_data_to_device,
            test_loader=valid_loader,
            device=device,
            force=force
        )

        # Get final results
        if "Test perplexity" in test_results:
            final_results = valid_results  # hashed
        else:
            final_results = {
                "Test perplexity": test_results["Perplexity"],
                "Validation perplexity": valid_results["Perplexity"],
            }

        print(final_results)

        return BenchmarkResult(
            task=cls.task,
            config=config,
            dataset=cls.dataset.__name__,
            results=final_results,
            pytorch_hub_id=pytorch_hub_url,
            model=paper_model_name,
            model_description=model_description,
            arxiv_id=paper_arxiv_id,
            pwc_id=paper_pwc_id,
            paper_results=paper_results,
            run_hash=run_hash,
        )
Exemplo n.º 11
0
    def benchmark(
        cls,
        model,
        model_description=None,
        dataset_year="2007",
        input_transform=None,
        target_transform=None,
        transforms=None,
        model_output_transform=None,
        collate_fn=None,
        send_data_to_device=None,
        device: str = "cuda",
        data_root: str = "./.data/vision/voc",
        num_workers: int = 4,
        batch_size: int = 32,
        num_gpu: int = 1,
        paper_model_name: str = None,
        paper_arxiv_id: str = None,
        paper_pwc_id: str = None,
        paper_results: dict = None,
        pytorch_hub_url: str = None,
    ) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(
            model, device=device, num_gpu=num_gpu
        )
        model.eval()

        if not input_transform or target_transform or transforms:
            input_transform = cls.input_transform
            target_transform = cls.target_transform

        if not model_output_transform:
            model_output_transform = cls.model_output_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        if not collate_fn:
            collate_fn = cls.collate_fn

        test_dataset = cls.dataset(
            root=data_root,
            image_set="val",
            year=dataset_year,
            transform=input_transform,
            target_transform=target_transform,
            transforms=transforms,
            download=True,
        )
        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True,
            collate_fn=collate_fn,
        )
        test_loader.no_classes = 21  # Number of classes for PASCALVoc
        test_results = evaluate_detection_voc(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device,
        )
        print(test_results)

        return BenchmarkResult(
            task=cls.task,
            config=config,
            dataset=cls.dataset.__name__,
            results=test_results,
            pytorch_hub_id=pytorch_hub_url,
            model=paper_model_name,
            model_description=model_description,
            arxiv_id=paper_arxiv_id,
            pwc_id=paper_pwc_id,
            paper_results=paper_results,
        )
Exemplo n.º 12
0
    def benchmark(cls,
                  model,
                  dataset_year='2017',
                  input_transform=None,
                  target_transform=None,
                  transforms=None,
                  model_output_transform=None,
                  collate_fn=None,
                  send_data_to_device=None,
                  device: str = 'cuda',
                  data_root: str = './.data/vision/coco',
                  num_workers: int = 4,
                  batch_size: int = 1,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None) -> BenchmarkResult:

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)
        model.eval()

        if not input_transform or target_transform or transforms:
            transforms = cls.transforms

        if not model_output_transform:
            model_output_transform = cls.model_output_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        if not collate_fn:
            collate_fn = cls.collate_fn

        test_dataset = cls.dataset(
            root=os.path.join(data_root, 'val%s' % dataset_year),
            annFile=os.path.join(
                data_root, 'annotations/instances_val%s.json' % dataset_year),
            transform=input_transform,
            target_transform=target_transform,
            transforms=transforms,
            download=True)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers,
                                 pin_memory=True,
                                 collate_fn=collate_fn)
        test_loader.no_classes = 91  # Number of classes for COCO Detection
        test_results = evaluate_detection_coco(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device)

        print(test_results)

        return BenchmarkResult(task=cls.task,
                               config=config,
                               dataset=cls.dataset.__name__,
                               results=test_results,
                               pytorch_hub_id=pytorch_hub_url,
                               model=paper_model_name,
                               arxiv_id=paper_arxiv_id,
                               pwc_id=paper_pwc_id,
                               paper_results=paper_results)
Exemplo n.º 13
0
    def benchmark(cls,
                  model,
                  model_description=None,
                  input_transform=None,
                  target_transform=None,
                  transforms=None,
                  model_output_transform=None,
                  collate_fn=None,
                  send_data_to_device=None,
                  dataset_year="2017",
                  device: str = "cuda",
                  data_root: str = "./.data/vision/coco",
                  num_workers: int = 4,
                  batch_size: int = 1,
                  pin_memory: bool = True,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None,
                  force: bool = False) -> BenchmarkResult:
        """Benchmarking function.

        Args:
            model: a PyTorch module, (e.g. a ``nn.Module`` object), that takes
                in COCO inputs and outputs COCO predictions.
            model_description (str, optional): Optional model description.
            input_transform (transforms.Compose, optional): Composing the
                transforms used to transform the dataset, e.g. applying
                resizing (e.g ``transforms.Resize``), center cropping, to
                tensor transformations and normalization.
            target_transform (torchvision.transforms.Compose, optional):
                Composing any transforms used to transform the target.
            transforms (torchbench.object_detection.transforms.Compose, optional):
                Does a joint transform on the input and the target - please see the
                torchbench.object_detection.transforms file for more information.
            model_output_transform (callable, optional): An optional function
                that takes in model output (after being passed through your
                ``model`` forward pass) and transforms it. Afterwards, the
                output will be passed into an evaluation function.
            collate_fn (callable, optional): How the dataset is collated - an
            optional callable passed into the DataLoader
            send_data_to_device (callable, optional): An optional function
                specifying how the model is sent to a device; see
                ``torchbench.utils.send_model_to_device`` for the default
                treatment.
            dataset_year (str, optional): the dataset year for COCO to use; the
            default (2017) creates the 'minival' validation set.
            device (str): Default is 'cuda' - this is the device that the model
                is sent to in the default treatment.
            data_root (str): The location of the COCO dataset - change this
                parameter when evaluating locally if your COCO data is
                located in a different folder (or alternatively if you want to
                download to an alternative location).
            num_workers (int): The number of workers to use for the DataLoader.
            batch_size (int) : The batch_size to use for evaluation; if you get
                memory errors, then reduce this (half each time) until your
                model fits onto the GPU.
            num_gpu (int): Number of GPUs - note that sotabench.com workers
                only support 1 GPU for now.
            paper_model_name (str, optional): The name of the model from the
                paper - if you want to link your build to a machine learning
                paper. See the COCO benchmark page for model names,
                https://www.sotabench.com/benchmark/coco-minival, e.g. on the paper
                leaderboard tab.
            paper_arxiv_id (str, optional): Optional linking to ArXiv if you
                want to link to papers on the leaderboard; put in the
                corresponding paper's ArXiv ID, e.g. '1611.05431'.
            paper_pwc_id (str, optional): Optional linking to Papers With Code;
                put in the corresponding papers with code URL slug, e.g.
                'u-gat-it-unsupervised-generative-attentional'
            paper_results (dict, optional) : If the paper you are reproducing
                does not have model results on sotabench.com, you can specify
                the paper results yourself through this argument, where keys
                are metric names, values are metric values. e.g::

                    {'box AP': 0.349, 'AP50': 0.592, ...}.

                Ensure that the metric names match those on the sotabench
                leaderboard - for COCO it should be 'box AP', 'AP50',
                'AP75', 'APS', 'APM', 'APL'
            pytorch_hub_url (str, optional): Optional linking to PyTorch Hub
                url if your model is linked there; e.g:
                'nvidia_deeplearningexamples_waveglow'.
        """

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)
        model.eval()

        if not input_transform or target_transform or transforms:
            transforms = cls.transforms

        if not model_output_transform:
            model_output_transform = cls.model_output_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        if not collate_fn:
            collate_fn = cls.collate_fn

        test_dataset = cls.dataset(
            root=os.path.join(data_root, "val%s" % dataset_year),
            annFile=os.path.join(
                data_root, "annotations/instances_val%s.json" % dataset_year),
            transform=input_transform,
            target_transform=target_transform,
            transforms=transforms,
            download=True,
        )
        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=pin_memory,
            collate_fn=collate_fn,
        )
        test_loader.no_classes = 91  # Number of classes for COCO Detection
        test_results, speed_mem_metrics, run_hash = evaluate_detection_coco(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device,
            force=force)

        print(test_results)

        return BenchmarkResult(
            task=cls.task,
            config=config,
            dataset='COCO minival',
            results=test_results,
            speed_mem_metrics=speed_mem_metrics,
            pytorch_hub_id=pytorch_hub_url,
            model=paper_model_name,
            model_description=model_description,
            arxiv_id=paper_arxiv_id,
            pwc_id=paper_pwc_id,
            paper_results=paper_results,
            run_hash=run_hash,
        )
Exemplo n.º 14
0
    def benchmark(cls,
                  model,
                  model_description=None,
                  input_transform=None,
                  target_transform=None,
                  model_output_transform=None,
                  send_data_to_device=None,
                  device: str = "cuda",
                  data_root: str = "./.data/vision/imagenet",
                  num_workers: int = 4,
                  batch_size: int = 128,
                  pin_memory: bool = False,
                  num_gpu: int = 1,
                  paper_model_name: str = None,
                  paper_arxiv_id: str = None,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  pytorch_hub_url: str = None,
                  force: bool = False) -> BenchmarkResult:
        """Benchmarking function.

        Args:
            model: a PyTorch module, (e.g. a ``nn.Module`` object), that takes
                in ImageNet inputs and outputs ImageNet predictions.
            model_description (str, optional): Optional model description.
            input_transform (transforms.Compose, optional): Composing the
                transforms used to transform the dataset, e.g. applying
                resizing (e.g ``transforms.Resize``), center cropping, to
                tensor transformations and normalization.
            target_transform (torchvision.transforms.Compose, optional):
                Composing any transforms used to transform the target. This is
                usually not used for ImageNet.
            model_output_transform (callable, optional): An optional function
                that takes in model output (after being passed through your
                ``model`` forward pass) and transforms it. Afterwards, the
                output will be passed into an evaluation function.
            send_data_to_device (callable, optional): An optional function
                specifying how the model is sent to a device; see
                ``torchbench.utils.send_model_to_device`` for the default
                treatment.
            device (str): Default is 'cuda' - this is the device that the model
                is sent to in the default treatment.
            data_root (str): The location of the ImageNet dataset - change this
                parameter when evaluating locally if your ImageNet data is
                located in a different folder (or alternatively if you want to
                download to an alternative location).
            num_workers (int): The number of workers to use for the DataLoader.
            batch_size (int) : The batch_size to use for evaluation; if you get
                memory errors, then reduce this (half each time) until your
                model fits onto the GPU.
            num_gpu (int): Number of GPUs - note that sotabench.com workers
                only support 1 GPU for now.
            paper_model_name (str, optional): The name of the model from the
                paper - if you want to link your build to a machine learning
                paper. See the ImageNet benchmark page for model names,
                https://www.sotabench.com/benchmark/imagenet, e.g. on the paper
                leaderboard tab.
            paper_arxiv_id (str, optional): Optional linking to ArXiv if you
                want to link to papers on the leaderboard; put in the
                corresponding paper's ArXiv ID, e.g. '1611.05431'.
            paper_pwc_id (str, optional): Optional linking to Papers With Code;
                put in the corresponding papers with code URL slug, e.g.
                'u-gat-it-unsupervised-generative-attentional'
            paper_results (dict, optional) : If the paper you are reproducing
                does not have model results on sotabench.com, you can specify
                the paper results yourself through this argument, where keys
                are metric names, values are metric values. e.g::

                    {'Top 1 Accuracy': 0.543, 'Top 5 Accuracy': 0.654}.

                Ensure that the metric names match those on the sotabench
                leaderboard - for ImageNet it should be 'Top 1 Accuracy' and
                'Top 5 Accuracy'.
            pytorch_hub_url (str, optional): Optional linking to PyTorch Hub
                url if your model is linked there; e.g:
                'nvidia_deeplearningexamples_waveglow'.
        """

        print("Benchmarking on ImageNet...")

        config = locals()
        model, device = send_model_to_device(model,
                                             device=device,
                                             num_gpu=num_gpu)
        model.eval()

        if not input_transform:
            input_transform = cls.input_transform

        if not send_data_to_device:
            send_data_to_device = cls.send_data_to_device

        try:
            test_dataset = cls.dataset(
                data_root,
                split="val",
                transform=input_transform,
                target_transform=target_transform,
                download=True,
            )
        except Exception:
            test_dataset = cls.dataset(
                data_root,
                split="val",
                transform=input_transform,
                target_transform=target_transform,
                download=False,
            )

        test_loader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=pin_memory,
        )
        test_results, speed_mem_metrics, run_hash = evaluate_classification(
            model=model,
            test_loader=test_loader,
            model_output_transform=model_output_transform,
            send_data_to_device=send_data_to_device,
            device=device,
            force=force)

        print(" * Acc@1 {top1:.3f} Acc@5 {top5:.3f}".format(
            top1=test_results["Top 1 Accuracy"],
            top5=test_results["Top 5 Accuracy"],
        ))

        return BenchmarkResult(
            task=cls.task,
            config=config,
            dataset=cls.dataset.__name__,
            results=test_results,
            speed_mem_metrics=speed_mem_metrics,
            pytorch_hub_id=pytorch_hub_url,
            model=paper_model_name,
            model_description=model_description,
            arxiv_id=paper_arxiv_id,
            pwc_id=paper_pwc_id,
            paper_results=paper_results,
            run_hash=run_hash,
        )