def repo_get(config: Config, repository: str): """Get repository. Repository name must be in ``owner/project`` format. """ repository = check_repo(repository) client = Client(config) table(client.repository_get(repository=repository))
def login(config: Config): """Obtain authentication token.""" username = click.prompt("Username") password = click.prompt("Password", hide_input=True) client = Client(config) config.token = client.login(username=username, password=password) config.save()
def test_run_hash(run_hash): config_path = os.path.expanduser(consts.DEFAULT_CONFIG_PATH) config = Config(config_path) client = Client(config) res = client.get_results_by_run_hash(run_hash=run_hash) assert isinstance(res, dict) assert res["Top 5 Accuracy"] == 0.9795
def test_check_results(run_hash): client = Client.public() r = [{ "model": "FixResNeXt-101 32x48d", "task": "Image Classification", "dataset_name": "ImageNet", "results": { "Top 1 Accuracy": 0.8636199999999999, "Top 5 Accuracy": 0.9795, }, "arxiv_id": "1906.06423", "pwc_id": None, "pytorch_hub_id": None, "paper_results": None, "run_hash": run_hash, }] res = client.check_results(r) assert len(res["response"]["errors"]) == 0 r[0]["task"] = "Make a cup of tea" res = client.check_results(r) e = res["response"]["errors"][0] assert "error" in e
def cache_exists(self): """ Checks whether the cache exists in the sotabench.com database - if so then sets self.results to cached results and returns True. You can use this property for control flow to break a for loop over a dataset after the first iteration. This prevents re-running the same calculation for the same model twice. Q: Why should the user use this? A: If you want fast "continuous evaluation" and don't want to avoid rerunning the same model over and over each time you commit something new to your repository. Examples: Breaking a for loop for a PyTorch evaluation .. code-block:: python ... with torch.no_grad(): for i, (input, target) in enumerate(test_loader): input = input.to(device=device, non_blocking=True) target = target.to(device=device, non_blocking=True) output = model(input) image_ids = [img[0].split('/')[-1].replace('.JPEG', '') for img in test_loader.dataset.imgs[i*test_loader.batch_size:(i+1)*test_loader.batch_size]] evaluator.add(dict(zip(image_ids, list(output.cpu().numpy())))) if evaluator.cache_exists: break evaluator.save() # uses the cached results This logic is for the server; it will not break the loop if you evaluate locally. :return: bool or None (if not in check mode) """ if not self.first_batch_processed: raise ValueError( 'No batches of data have been processed so no batch_hash exists' ) if not is_server(): # we only check the cache on the server return None client = Client.public() cached_res = client.get_results_by_run_hash(self.batch_hash) if cached_res: self.results = cached_res self.cached_results = True print("No model change detected (using the first batch run " "hash). Will use cached results.") return True return False
def cache_exists(self): """ Checks whether the cache exists in the sotabench.com database - if so then sets self.results to cached results and returns True. You can use this property for control flow to break a for loop over a dataset after the first iteration. This prevents re-running the same calculation for the same model twice. Q: Why should the user use this? A: If you want fast "continuous evaluation" and don't want to avoid rerunning the same model over and over each time you commit something new to your repository. Examples: Breaking a for loop if the model is the same as last time we ran .. code-block:: python ... with torch.no_grad(): for i, (input, target) in enumerate(iterator): ... output = model(input) # optional formatting of output here to be a list of detection dicts evaluator.add(output) if evaluator.cache_exists: break evaluator.save() This logic is for the server; it will not break the loop if you evaluate locally. :return: bool or None (if not on server) """ if not is_server(): # we only check the cache on the server return None if not self.first_batch_processed: return False if self._cache_exists is not None: return self._cache_exists client = Client.public() cached_res = client.get_results_by_run_hash(self.batch_hash) if cached_res: self.results = cached_res self.cached_results = True print("No model change detected (using the first batch run " f"hash {self.batch_hash}). Will use cached results.") self._cache_exists = True else: self._cache_exists = False return self._cache_exists
def evaluate_language_model( model, test_loader, model_output_transform, send_data_to_device, device="cuda", ): n_steps, eval_loss = 0, 0 iterator = tqdm.tqdm(test_loader, desc="Evaluation") with torch.no_grad(): for i, labels in enumerate(iterator): labels, _ = send_data_to_device(labels, None, device=device) output = model(labels) if model_output_transform is not None: output = model_output_transform(output, None, model=model) shift_logits = output[..., :-1, :].contiguous() shift_labels = labels[..., 1:].contiguous() objective = CrossEntropyLoss(ignore_index=-1) loss = objective( shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1), ) eval_loss += loss.item() n_steps += 1 iterator.desc = ( f"Eval loss: {eval_loss / n_steps} " f"ppl: {np.exp(eval_loss / n_steps)}" ) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([eval_loss], output) # if we are in check model we don't need to go beyond the # first batch if in_check_mode(): iterator.close() break # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch " "run_hash). Returning cached results." ) return cached_res, run_hash return {"Perplexity": np.exp(eval_loss / n_steps)}, run_hash
def cache_exists(self): """ Checks whether the cache exists in the sotabench.com database - if so then sets self.results to cached results and returns True. You can use this property for control flow to break a for loop over a dataset after the first iteration. This prevents rerunning the same calculation for the same model twice. Examples: Breaking a for loop .. code-block:: python ... with torch.no_grad(): for i, (input, target) in enumerate(iterator): ... output = model(input) # optional formatting of output here to be a list of detection dicts evaluator.add(output) if evaluator.cache_exists: break evaluator.save() :return: bool or None (if not in check mode) """ if not is_server(): # we only check the cache on the server return None if not self.first_batch_processed: return False if self._cache_exists is not None: return self._cache_exists client = Client.public() cached_res = client.get_results_by_run_hash(self.batch_hash) if cached_res: self.results = cached_res self.cached_results = True print("No model change detected (using the first batch run " "hash). Will use cached results.") self._cache_exists = True else: self._cache_exists = False return self._cache_exists
def evaluate_classification(model, test_loader, model_output_transform, send_data_to_device, device='cuda'): batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() iterator = tqdm.tqdm(test_loader, file=sys.stdout) with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) output = model(input) if model_output_transform is not None: output = model_output_transform(output, target) check_metric_inputs(output, target, test_loader.dataset, i) prec1, prec5 = accuracy(output, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([prec1, prec5], output) # if we are in check model we don't need to go beyond the first batch if in_check_mode(): iterator.close() break # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run_hash). Returning cached results." ) return cached_res, run_hash return { 'Top 1 Accuracy': top1.avg / 100, 'Top 5 Accuracy': top5.avg / 100 }, run_hash
def cache_exists(self): """ Checks whether the cache exists in the sotabench.com database - if so then sets self.results to cached results and returns True. You can use this property for control flow to break a for loop over a dataset after the first iteration. This prevents rerunning the same calculation for the same model twice. Examples: Breaking a for loop .. code-block:: python ... with torch.no_grad(): for i, (input, target) in enumerate(iterator): ... output = model(input) # output and target should then be flattened into 1D np.ndarrays and passed in below evaluator.update(output=output, target=target) if evaluator.cache_exists: break evaluator.save() :return: bool or None (if not in check mode) """ if not self.first_batch_processed: raise ValueError( 'No batches of data have been processed so no batch_hash exists' ) if not is_server(): return None client = Client.public() cached_res = client.get_results_by_run_hash(self.batch_hash) if cached_res: self.results = cached_res self.cached_results = True print("No model change detected (using the first batch run " "hash). Will use cached results.") return True return False
def evaluate_detection_coco(model, test_loader, model_output_transform, send_data_to_device, device="cuda", force=False): coco = get_coco_api_from_dataset(test_loader.dataset) iou_types = ['bbox'] coco_evaluator = CocoEvaluator(coco, iou_types) iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5) init_time = time.time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) original_output = model(input) output, target = model_output_transform(original_output, target) result = { tar["image_id"].item(): out for tar, out in zip(target, output) } coco_evaluator.update(result) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([], original_output) # if we are in check model we don't need to go beyond the first # batch if in_check_mode(): iterator.close() break if not force: # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run " "hash). Returning cached results.") speed_mem_metrics = { 'Tasks / Evaluation Time': None, 'Evaluation Time': None, 'Tasks': None, 'Max Memory Allocated (Total)': None, } return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time) coco_evaluator.synchronize_between_processes() coco_evaluator.accumulate() coco_evaluator.summarize() memory_allocated = torch.cuda.max_memory_allocated(device=device) torch.cuda.reset_max_memory_allocated(device=device) speed_mem_metrics = { 'Tasks / Evaluation Time': len(test_loader.dataset) / exec_time, 'Tasks': len(test_loader.dataset), 'Evaluation Time': (time.time() - init_time), 'Max Memory Allocated (Total)': memory_allocated, } return (get_coco_metrics(coco_evaluator), speed_mem_metrics, run_hash)
def repo_list(config: Config, owner): """List repositories.""" client = Client(config) table(client.repository_list(username=owner))
def build_get(config: Config, repository: str, run_number: int): """Get build details.""" repository = check_repo(repository) client = Client(config) table(client.build_get(repository=repository, run_number=run_number))
def build_list(config: Config, repository: str): """List builds for a given repository..""" repository = check_repo(repository) client = Client(config) table(client.build_list(repository=repository))
def build_start(config: Config, repository): """Start build.""" repository = check_repo(repository) client = Client(config) table(client.build_start(repository=repository))
def evaluate_segmentation( model, test_loader, model_output_transform, send_data_to_device, num_classes, device="cuda", ): confmat = ConfusionMatrix(num_classes) iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5) init_time = time.time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) output = model(input) output, target = model_output_transform(output, target) confmat.update(target, output) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([], output) # if we are in check model we don't need to go beyond the first # batch if in_check_mode(): iterator.close() break # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run " "hash). Returning cached results.") speed_mem_metrics = { "Tasks / Evaluation Time": None, "Evaluation Time": None, "Tasks": None, "Max Memory Allocated (Total)": None, } return cached_res, speed_mem_metrics, run_hash exec_time = time.time() - init_time acc_global, acc, iu = confmat.compute() memory_allocated = torch.cuda.max_memory_allocated(device=device) torch.cuda.reset_max_memory_allocated(device=device) speed_mem_metrics = { "Tasks / Evaluation Time": len(test_loader.dataset) / exec_time, "Tasks": len(test_loader.dataset), "Evaluation Time": (time.time() - init_time), "Max Memory Allocated (Total)": memory_allocated, } return ( { "Accuracy": acc_global.item(), "Mean IOU": iu.mean().item() }, speed_mem_metrics, run_hash, )
def evaluate_classification( model, test_loader, model_output_transform, send_data_to_device, device="cuda", force=False ): top1 = AverageMeter() top5 = AverageMeter() iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5) init_time = time.time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) output = model(input) if model_output_transform is not None: output = model_output_transform(output, target, model=model) check_metric_inputs(output, target, test_loader.dataset, i) prec1, prec5 = accuracy(output, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([prec1, prec5], output) # if we are in check model we don't need to go beyond the first # batch if in_check_mode(): iterator.close() break if not force: # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run " "hash). Returning cached results." ) speed_mem_metrics = { 'Tasks / Evaluation Time': None, 'Evaluation Time': None, 'Tasks': None, 'Max Memory Allocated (Total)': None, } return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time) memory_allocated = torch.cuda.max_memory_allocated(device=device) torch.cuda.reset_max_memory_allocated(device=device) speed_mem_metrics = { 'Tasks / Evaluation Time': len(test_loader.dataset) / exec_time, 'Tasks': len(test_loader.dataset), 'Evaluation Time': (time.time() - init_time), 'Max Memory Allocated (Total)': memory_allocated, } return ( {"Top 1 Accuracy": top1.avg / 100, "Top 5 Accuracy": top5.avg / 100}, speed_mem_metrics, run_hash, )