def benchmark(cls, model, input_transform=None, target_transform=None, model_output_transform=None, send_data_to_device=None, device: str = 'cuda', data_root: str = './.data/vision/stl10', num_workers: int = 4, batch_size: int = 128, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None) -> BenchmarkResult: config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform: input_transform = cls.input_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device test_dataset = cls.dataset(data_root, split='test', transform=input_transform, target_transform=target_transform, download=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) test_results, run_hash = evaluate_classification( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device) print(' * Acc@1 {top1:.3f} Acc@5 {top5:.3f}'.format( top1=test_results['Top 1 Accuracy'], top5=test_results['Top 5 Accuracy'])) return BenchmarkResult(task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, run_hash=run_hash)
def save(self, **kwargs): """ Calculate results and then put into a BenchmarkResult object On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded on the platform. :return: BenchmarkResult object with results and metadata """ # recalculate to ensure no mistakes made during batch-by-batch metric calculation self.get_results() return BenchmarkResult( task=self.task, config={}, results=self.results, speed_mem_metrics=self.speed_mem_metrics, model=self.model_name, model_description=self.model_description, arxiv_id=self.paper_arxiv_id, pwc_id=self.paper_pwc_id, paper_results=self.paper_results, run_hash=self.batch_hash, **kwargs, )
def benchmark(cls, model, input_transform=None, target_transform=None, model_output_transform=None, device: str = 'cuda', data_root: str = './.data/vision/cifar10', num_workers: int = 4, batch_size: int = 8, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None) -> BenchmarkResult: config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) if hasattr(model, 'eval'): model.eval() if not input_transform: input_transform = cls.input_transform test_dataset = cls.dataset(data_root, train=False, transform=input_transform, target_transform=target_transform, download=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) test_results = evaluate_image_generation_gan( model=model, model_output_transform=model_output_transform, test_loader=test_loader, device=device) print(test_results) return BenchmarkResult(task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results)
def save(self, **kwargs): """ Calculate results and then put into a BenchmarkResult object On the sotabench.com server, this will produce a JSON file serialisation in sotabench_results.json and results will be recorded on the platform. Users should save once all predictions are added, for instance: .. code-block:: python from sotabencheval.question_answering import SQuADEvaluator, SQuADVersion evaluator = SQuADEvaluator(model_name='SpanBERT', paper_arxiv_id='1907.10529', version=SQuADVersion.V20) # processing/setup logic here evaluator.reset_time() for i, (input, target) in enumerate(data_loader): ... output = model(input) # potentially formatting of the output here evaluator.add(output) evaluator.save() Here once we have added all the predictions to the evaluator, we .save() so we evaluate and, if on the server, results are serialized and saved to the server. :return: BenchmarkResult object with results and metadata """ # recalculate to ensure no mistakes made during batch-by-batch metric calculation self.get_results() return BenchmarkResult( task=self.task, config={}, results=self.results, speed_mem_metrics=self.speed_mem_metrics, model=self.model_name, model_description=self.model_description, arxiv_id=self.paper_arxiv_id, pwc_id=self.paper_pwc_id, paper_results=self.paper_results, run_hash=self.batch_hash, **kwargs, )
def save(self): """ Calculate results and then put into a BenchmarkResult object On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded on the platform. :return: BenchmarkResult object with results and metadata """ # recalculate to ensure no mistakes made during batch-by-batch metric calculation self.get_results() # If this is the first time the model is run, then we record evaluation time information if not self.cached_results: unique_image_ids = set([d["image_id"] for d in self.detections]) exec_speed = time.time() - self.init_time self.speed_mem_metrics["Tasks / Evaluation Time"] = ( len(unique_image_ids) / exec_speed ) self.speed_mem_metrics["Tasks"] = len(unique_image_ids) self.speed_mem_metrics["Evaluation Time"] = exec_speed else: self.speed_mem_metrics["Tasks / Evaluation Time"] = None self.speed_mem_metrics["Tasks"] = None self.speed_mem_metrics["Evaluation Time"] = None return BenchmarkResult( task=self.task, config={}, dataset="COCO minival", results=self.results, speed_mem_metrics=self.speed_mem_metrics, model=self.model_name, model_description=self.model_description, arxiv_id=self.paper_arxiv_id, pwc_id=self.paper_pwc_id, paper_results=self.paper_results, run_hash=self.batch_hash, )
def benchmark(cls, model, input_transform=None, target_transform=None, transforms=None, model_output_transform=None, collate_fn=None, send_data_to_device=None, device: str = 'cuda', data_root: str = './.data/vision/camvid', num_workers: int = 4, batch_size: int = 32, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None) -> BenchmarkResult: config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform or target_transform or transforms: transforms = cls.transforms if not model_output_transform: model_output_transform = cls.model_output_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device if not collate_fn: collate_fn = cls.collate_fn test_dataset = cls.dataset(root=data_root, split='val', transform=input_transform, target_transform=target_transform, transforms=transforms) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=collate_fn) test_loader.no_classes = 12 # Number of classes for CamVid test_results = evaluate_segmentation(model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device) print(test_results) return BenchmarkResult(task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results)
def save(self): """ Calculate results and then put into a BenchmarkResult object On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded on the platform. :return: BenchmarkResult object with results and metadata """ # recalculate to ensure no mistakes made during batch-by-batch metric calculation self.get_results() # If this is the first time the model is run, then we record evaluation time information if not self.cached_results: self.speed_mem_metrics['Tasks / Evaluation Time'] = None self.speed_mem_metrics['Tasks'] = None self.speed_mem_metrics['Evaluation Time'] = (time.time() - self.init_time) else: self.speed_mem_metrics['Tasks / Evaluation Time'] = None self.speed_mem_metrics['Tasks'] = None self.speed_mem_metrics['Evaluation Time'] = None return BenchmarkResult( task=self.task, config={}, dataset='ADE20K val', results=self.results, speed_mem_metrics=self.speed_mem_metrics, model=self.model_name, model_description=self.model_description, arxiv_id=self.paper_arxiv_id, pwc_id=self.paper_pwc_id, paper_results=self.paper_results, run_hash=self.batch_hash, )
def benchmark(cls, model, model_description=None, input_transform=None, target_transform=None, transforms=None, model_output_transform=None, collate_fn=None, send_data_to_device=None, device: str = "cuda", data_root: str = "./.data/vision/pascalcontext", num_workers: int = 4, batch_size: int = 32, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, force: bool = False) -> BenchmarkResult: config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform or target_transform or transforms: transforms = cls.transforms if not model_output_transform: model_output_transform = cls.model_output_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device if not collate_fn: collate_fn = cls.collate_fn test_dataset = cls.dataset( root=data_root, split="val", transform=input_transform, target_transform=target_transform, transforms=transforms, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=collate_fn, ) test_loader.no_classes = 59 # Number of classes for PASCALContext test_results, speed_mem_metrics, run_hash = evaluate_segmentation( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, force=force) print(test_results) return BenchmarkResult( task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, speed_mem_metrics=speed_mem_metrics, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, model_description=model_description, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, )
def benchmark(cls, model, model_description=None, input_transform=None, target_transform=None, model_output_transform=None, send_data_to_device=None, device: str = "cuda", data_root: str = "./.data/vision/mnist", num_workers: int = 4, batch_size: int = 128, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, force: bool = False) -> BenchmarkResult: config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform: input_transform = cls.input_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device test_dataset = cls.dataset( data_root, train=False, transform=input_transform, target_transform=target_transform, download=True, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, ) test_results, speed_mem_metrics, run_hash = evaluate_classification( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, force=force) print(" * Acc@1 {top1:.3f} Acc@5 {top5:.3f}".format( top1=test_results["Top 1 Accuracy"], top5=test_results["Top 5 Accuracy"], )) return BenchmarkResult( task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, speed_mem_metrics=speed_mem_metrics, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, model_description=model_description, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, run_hash=run_hash, )
def benchmark( cls, model, model_description=None, encoder=None, context_length: int = 1024, model_output_transform=None, device: str = "cuda", data_root: str = "./.data/nlp/wikitext-103", num_workers: int = 4, batch_size: int = 8, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, force: bool = False ) -> BenchmarkResult: config = locals() model, device = send_model_to_device( model, device=device, num_gpu=num_gpu ) model.eval() if not encoder: raise ValueError( "Please provide an encoder to evaluate on this benchmark!" ) # Test Split test_dataset = cls.dataset( data_root, split="test", context_length=context_length, encoder=encoder, download=True, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, ) test_results, run_hash = evaluate_language_model( model=model, model_output_transform=model_output_transform, send_data_to_device=cls.send_data_to_device, test_loader=test_loader, device=device, force=force ) # Valid Split valid_dataset = cls.dataset( data_root, split="valid", context_length=context_length, encoder=encoder, download=True, ) valid_loader = DataLoader( valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, ) valid_results, valid_run_hash = evaluate_language_model( model=model, model_output_transform=model_output_transform, send_data_to_device=cls.send_data_to_device, test_loader=valid_loader, device=device, force=force ) # Get final results if "Test perplexity" in test_results: final_results = valid_results # hashed else: final_results = { "Test perplexity": test_results["Perplexity"], "Validation perplexity": valid_results["Perplexity"], } print(final_results) return BenchmarkResult( task=cls.task, config=config, dataset=cls.dataset.__name__, results=final_results, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, model_description=model_description, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, run_hash=run_hash, )
def benchmark( cls, model, model_description=None, dataset_year="2007", input_transform=None, target_transform=None, transforms=None, model_output_transform=None, collate_fn=None, send_data_to_device=None, device: str = "cuda", data_root: str = "./.data/vision/voc", num_workers: int = 4, batch_size: int = 32, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, ) -> BenchmarkResult: config = locals() model, device = send_model_to_device( model, device=device, num_gpu=num_gpu ) model.eval() if not input_transform or target_transform or transforms: input_transform = cls.input_transform target_transform = cls.target_transform if not model_output_transform: model_output_transform = cls.model_output_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device if not collate_fn: collate_fn = cls.collate_fn test_dataset = cls.dataset( root=data_root, image_set="val", year=dataset_year, transform=input_transform, target_transform=target_transform, transforms=transforms, download=True, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=collate_fn, ) test_loader.no_classes = 21 # Number of classes for PASCALVoc test_results = evaluate_detection_voc( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, ) print(test_results) return BenchmarkResult( task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, model_description=model_description, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, )
def benchmark(cls, model, dataset_year='2017', input_transform=None, target_transform=None, transforms=None, model_output_transform=None, collate_fn=None, send_data_to_device=None, device: str = 'cuda', data_root: str = './.data/vision/coco', num_workers: int = 4, batch_size: int = 1, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None) -> BenchmarkResult: config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform or target_transform or transforms: transforms = cls.transforms if not model_output_transform: model_output_transform = cls.model_output_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device if not collate_fn: collate_fn = cls.collate_fn test_dataset = cls.dataset( root=os.path.join(data_root, 'val%s' % dataset_year), annFile=os.path.join( data_root, 'annotations/instances_val%s.json' % dataset_year), transform=input_transform, target_transform=target_transform, transforms=transforms, download=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=collate_fn) test_loader.no_classes = 91 # Number of classes for COCO Detection test_results = evaluate_detection_coco( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device) print(test_results) return BenchmarkResult(task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results)
def benchmark(cls, model, model_description=None, input_transform=None, target_transform=None, transforms=None, model_output_transform=None, collate_fn=None, send_data_to_device=None, dataset_year="2017", device: str = "cuda", data_root: str = "./.data/vision/coco", num_workers: int = 4, batch_size: int = 1, pin_memory: bool = True, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, force: bool = False) -> BenchmarkResult: """Benchmarking function. Args: model: a PyTorch module, (e.g. a ``nn.Module`` object), that takes in COCO inputs and outputs COCO predictions. model_description (str, optional): Optional model description. input_transform (transforms.Compose, optional): Composing the transforms used to transform the dataset, e.g. applying resizing (e.g ``transforms.Resize``), center cropping, to tensor transformations and normalization. target_transform (torchvision.transforms.Compose, optional): Composing any transforms used to transform the target. transforms (torchbench.object_detection.transforms.Compose, optional): Does a joint transform on the input and the target - please see the torchbench.object_detection.transforms file for more information. model_output_transform (callable, optional): An optional function that takes in model output (after being passed through your ``model`` forward pass) and transforms it. Afterwards, the output will be passed into an evaluation function. collate_fn (callable, optional): How the dataset is collated - an optional callable passed into the DataLoader send_data_to_device (callable, optional): An optional function specifying how the model is sent to a device; see ``torchbench.utils.send_model_to_device`` for the default treatment. dataset_year (str, optional): the dataset year for COCO to use; the default (2017) creates the 'minival' validation set. device (str): Default is 'cuda' - this is the device that the model is sent to in the default treatment. data_root (str): The location of the COCO dataset - change this parameter when evaluating locally if your COCO data is located in a different folder (or alternatively if you want to download to an alternative location). num_workers (int): The number of workers to use for the DataLoader. batch_size (int) : The batch_size to use for evaluation; if you get memory errors, then reduce this (half each time) until your model fits onto the GPU. num_gpu (int): Number of GPUs - note that sotabench.com workers only support 1 GPU for now. paper_model_name (str, optional): The name of the model from the paper - if you want to link your build to a machine learning paper. See the COCO benchmark page for model names, https://www.sotabench.com/benchmark/coco-minival, e.g. on the paper leaderboard tab. paper_arxiv_id (str, optional): Optional linking to ArXiv if you want to link to papers on the leaderboard; put in the corresponding paper's ArXiv ID, e.g. '1611.05431'. paper_pwc_id (str, optional): Optional linking to Papers With Code; put in the corresponding papers with code URL slug, e.g. 'u-gat-it-unsupervised-generative-attentional' paper_results (dict, optional) : If the paper you are reproducing does not have model results on sotabench.com, you can specify the paper results yourself through this argument, where keys are metric names, values are metric values. e.g:: {'box AP': 0.349, 'AP50': 0.592, ...}. Ensure that the metric names match those on the sotabench leaderboard - for COCO it should be 'box AP', 'AP50', 'AP75', 'APS', 'APM', 'APL' pytorch_hub_url (str, optional): Optional linking to PyTorch Hub url if your model is linked there; e.g: 'nvidia_deeplearningexamples_waveglow'. """ config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform or target_transform or transforms: transforms = cls.transforms if not model_output_transform: model_output_transform = cls.model_output_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device if not collate_fn: collate_fn = cls.collate_fn test_dataset = cls.dataset( root=os.path.join(data_root, "val%s" % dataset_year), annFile=os.path.join( data_root, "annotations/instances_val%s.json" % dataset_year), transform=input_transform, target_transform=target_transform, transforms=transforms, download=True, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, collate_fn=collate_fn, ) test_loader.no_classes = 91 # Number of classes for COCO Detection test_results, speed_mem_metrics, run_hash = evaluate_detection_coco( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, force=force) print(test_results) return BenchmarkResult( task=cls.task, config=config, dataset='COCO minival', results=test_results, speed_mem_metrics=speed_mem_metrics, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, model_description=model_description, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, run_hash=run_hash, )
def benchmark(cls, model, model_description=None, input_transform=None, target_transform=None, model_output_transform=None, send_data_to_device=None, device: str = "cuda", data_root: str = "./.data/vision/imagenet", num_workers: int = 4, batch_size: int = 128, pin_memory: bool = False, num_gpu: int = 1, paper_model_name: str = None, paper_arxiv_id: str = None, paper_pwc_id: str = None, paper_results: dict = None, pytorch_hub_url: str = None, force: bool = False) -> BenchmarkResult: """Benchmarking function. Args: model: a PyTorch module, (e.g. a ``nn.Module`` object), that takes in ImageNet inputs and outputs ImageNet predictions. model_description (str, optional): Optional model description. input_transform (transforms.Compose, optional): Composing the transforms used to transform the dataset, e.g. applying resizing (e.g ``transforms.Resize``), center cropping, to tensor transformations and normalization. target_transform (torchvision.transforms.Compose, optional): Composing any transforms used to transform the target. This is usually not used for ImageNet. model_output_transform (callable, optional): An optional function that takes in model output (after being passed through your ``model`` forward pass) and transforms it. Afterwards, the output will be passed into an evaluation function. send_data_to_device (callable, optional): An optional function specifying how the model is sent to a device; see ``torchbench.utils.send_model_to_device`` for the default treatment. device (str): Default is 'cuda' - this is the device that the model is sent to in the default treatment. data_root (str): The location of the ImageNet dataset - change this parameter when evaluating locally if your ImageNet data is located in a different folder (or alternatively if you want to download to an alternative location). num_workers (int): The number of workers to use for the DataLoader. batch_size (int) : The batch_size to use for evaluation; if you get memory errors, then reduce this (half each time) until your model fits onto the GPU. num_gpu (int): Number of GPUs - note that sotabench.com workers only support 1 GPU for now. paper_model_name (str, optional): The name of the model from the paper - if you want to link your build to a machine learning paper. See the ImageNet benchmark page for model names, https://www.sotabench.com/benchmark/imagenet, e.g. on the paper leaderboard tab. paper_arxiv_id (str, optional): Optional linking to ArXiv if you want to link to papers on the leaderboard; put in the corresponding paper's ArXiv ID, e.g. '1611.05431'. paper_pwc_id (str, optional): Optional linking to Papers With Code; put in the corresponding papers with code URL slug, e.g. 'u-gat-it-unsupervised-generative-attentional' paper_results (dict, optional) : If the paper you are reproducing does not have model results on sotabench.com, you can specify the paper results yourself through this argument, where keys are metric names, values are metric values. e.g:: {'Top 1 Accuracy': 0.543, 'Top 5 Accuracy': 0.654}. Ensure that the metric names match those on the sotabench leaderboard - for ImageNet it should be 'Top 1 Accuracy' and 'Top 5 Accuracy'. pytorch_hub_url (str, optional): Optional linking to PyTorch Hub url if your model is linked there; e.g: 'nvidia_deeplearningexamples_waveglow'. """ print("Benchmarking on ImageNet...") config = locals() model, device = send_model_to_device(model, device=device, num_gpu=num_gpu) model.eval() if not input_transform: input_transform = cls.input_transform if not send_data_to_device: send_data_to_device = cls.send_data_to_device try: test_dataset = cls.dataset( data_root, split="val", transform=input_transform, target_transform=target_transform, download=True, ) except Exception: test_dataset = cls.dataset( data_root, split="val", transform=input_transform, target_transform=target_transform, download=False, ) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, ) test_results, speed_mem_metrics, run_hash = evaluate_classification( model=model, test_loader=test_loader, model_output_transform=model_output_transform, send_data_to_device=send_data_to_device, device=device, force=force) print(" * Acc@1 {top1:.3f} Acc@5 {top5:.3f}".format( top1=test_results["Top 1 Accuracy"], top5=test_results["Top 5 Accuracy"], )) return BenchmarkResult( task=cls.task, config=config, dataset=cls.dataset.__name__, results=test_results, speed_mem_metrics=speed_mem_metrics, pytorch_hub_id=pytorch_hub_url, model=paper_model_name, model_description=model_description, arxiv_id=paper_arxiv_id, pwc_id=paper_pwc_id, paper_results=paper_results, run_hash=run_hash, )