def test(cfg, local_rank, distributed, logger=None): device = torch.device('cuda') cpu_device = torch.device('cpu') # create model logger.info("Creating model \"{}\"".format(cfg.MODEL.ARCHITECTURE)) model = build_model(cfg).to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=True, ) # checkpoint checkpointer = Checkpointer(model, save_dir=cfg.LOGS.DIR, logger=logger) _ = checkpointer.load(f=cfg.MODEL.WEIGHT) # data_loader logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TEST)) stage = cfg.DATASETS.TEST.split('_')[-1] data_loader = make_data_loader(cfg, stage, distributed) dataset_name = cfg.DATASETS.TEST metrics = inference(model, criterion, data_loader, dataset_name, True) if is_main_process(): logger.info("Metrics:") for k, v in metrics.items(): logger.info("{}: {}".format(k, v))
def inference( model, data_loader, dataset_name, iou_types=("bbox", ), box_only=False, device="cuda", expected_results=(), expected_results_sigma_tol=4, output_folder=None, ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = (torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1) logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format( dataset_name, len(dataset))) start_time = time.time() predictions = compute_on_dataset(model, data_loader, device) # wait for all processes to complete before measuring the time synchronize() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) logger.info( "Total inference time: {} ({} s / img per device, on {} devices)". format(total_time_str, total_time * num_devices / len(dataset), num_devices)) predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return if output_folder: torch.save(predictions, os.path.join(output_folder, "predictions.pth")) extra_args = dict( box_only=box_only, iou_types=iou_types, expected_results=expected_results, expected_results_sigma_tol=expected_results_sigma_tol, ) return evaluate(dataset=dataset, predictions=predictions, output_folder=output_folder, **extra_args)
def init_pretrained_weights(key): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ import os import errno import gdown def _get_torch_home(): ENV_TORCH_HOME = 'TORCH_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' DEFAULT_CACHE_DIR = '~/.cache' torch_home = os.path.expanduser( os.getenv( ENV_TORCH_HOME, os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'))) return torch_home torch_home = _get_torch_home() model_dir = os.path.join(torch_home, 'checkpoints') try: os.makedirs(model_dir) except OSError as e: if e.errno == errno.EEXIST: # Directory already exists, ignore. pass else: # Unexpected OSError, re-raise. raise filename = model_urls[key].split('/')[-1] cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): if comm.is_main_process(): gdown.download(model_urls[key], cached_file, quiet=False) comm.synchronize() logger.info(f"Loading pretrained model from {cached_file}") state_dict = torch.load(cached_file, map_location=torch.device('cpu')) return state_dict
def cache_url(url, model_dir=None, progress=True): r"""Loads the Torch serialized object at the given URL. If the object is already present in `model_dir`, it's deserialized and returned. The filename part of the URL should follow the naming convention ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. The default value of `model_dir` is ``$TORCH_HOME/models`` where ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be overridden with the ``$TORCH_MODEL_ZOO`` environment variable. Args: url (string): URL of the object to download model_dir (string, optional): directory in which to save the object progress (bool, optional): whether or not to display a progress bar to stderr Example: >>> cached_file = utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') """ if model_dir is None: torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) if filename == "model_final.pkl": # workaround as pre-trained Caffe2 models from Detectron have all the same filename # so make the full path the filename by replacing / with _ filename = parts.path.replace("/", "_") cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file) and is_main_process(): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename) if hash_prefix is not None: hash_prefix = hash_prefix.group(1) # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, # which matches the hash PyTorch uses. So we skip the hash matching # if the hash_prefix is less than 6 characters if len(hash_prefix) < 6: hash_prefix = None _download_url_to_file(url, cached_file, hash_prefix, progress=progress) synchronize() return cached_file
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): all_predictions = all_gather(predictions_per_gpu) if not is_main_process(): return # merge the list of dicts predictions = {} for p in all_predictions: predictions.update(p) # convert a dict where the key is the index in a list video_ids = list(sorted(predictions.keys())) if len(video_ids) != video_ids[-1] + 1: logger = logging.getLogger("AlphAction.inference") logger.warning( "Number of videos that were gathered from multiple processes is not " "a contiguous set. Some images might be missing from the evaluation" ) # convert to a list predictions = [predictions[i] for i in video_ids] return predictions
def val_in_train(model, criterion, dataset_name_val, data_loader_val, tblogger, iteration, checkpointer, distributed): logger = logging.getLogger('eve.' + __name__) if distributed: model_val = model.module else: model_val = model # only main process will return result metrics = inference(model_val, criterion, data_loader_val, dataset_name_val) synchronize() if is_main_process(): if tblogger is not None: for k, v in metrics.items(): tblogger.add_scalar('val/' + k, v, iteration) logger.info("{}: {}".format(k, v)) return metrics else: return None
def inference( model, data_loader, dataset_name, mem_active=False, output_folder=None, ): # convert to a torch.device for efficiency device = torch.device("cuda") num_devices = get_world_size() logger = logging.getLogger("AlphAction.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} videos).".format(dataset_name, len(dataset))) start_time = time.time() predictions = compute_on_dataset(model, data_loader, device, logger, mem_active) # wait for all processes to complete before measuring the time synchronize() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) logger.info( "Total inference time: {} ({} s / video per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return if output_folder: torch.save(predictions, os.path.join(output_folder, "predictions.pth")) return evaluate( dataset=dataset, predictions=predictions, output_folder=output_folder, )
def inference(model, criterion, data_loader, dataset_name, save_result=False): logger = logging.getLogger('eve.' + __name__) device = torch.device('cuda') dataset = data_loader.dataset logger.info("Start evaluation on {} dataset ({} point clouds).".format( dataset_name, len(dataset))) if get_world_size() == 1: extra_args = {} else: rank = get_rank() extra_args = dict(desc="rank {}".format(rank)) start_time = time.time() model.eval() outputs_per_gpu = {} targets_per_gpu = {} file_path_per_gpu = {} times = [] with torch.no_grad(): for batch in tqdm(data_loader, **extra_args): locs, feats, targets, metadata = batch inputs = ME.SparseTensor(feats, coords=locs).to(device) targets = targets.to(device, non_blocking=True).long() torch.cuda.synchronize() start_time = time.time() outputs = model(inputs, y=targets) torch.cuda.synchronize() end_time = time.time() times.append(end_time - start_time) arch = cfg.MODEL.ARCHITECTURE if arch == 'minkunet4d' or arch == 'minkunet_eve': for batch_idx in range(len(metadata)): for time_idx in range(cfg.INPUT.VIDEO.NUM_FRAMES): inv_map = metadata[batch_idx][time_idx]['inverse_map'] file_path = metadata[batch_idx][time_idx]['file_path'] locs_frame = (locs[:, -1] == batch_idx) & \ (locs[:, -2] == time_idx) one_output, one_target = compute_one_frame( outputs, targets, locs_frame, inv_map) outputs_per_gpu[file_path] = one_output targets_per_gpu[file_path] = one_target file_path_per_gpu[file_path] = file_path else: # other minknet for batch_idx in range(len(metadata)): inv_map = metadata[batch_idx]['inverse_map'] file_path = metadata[batch_idx]['file_path'] # From MinkowskiEngine v0.3, batch index is on the first column locs_frame = locs[:, -1] == batch_idx one_output, one_target = compute_one_frame( outputs, targets, locs_frame, inv_map) outputs_per_gpu[file_path] = one_output targets_per_gpu[file_path] = one_target file_path_per_gpu[file_path] = file_path synchronize() logger.info("Total inference time: {}".format(np.sum(times))) # NOTE: `all_gather` will lead to CUDA out of memory # We use `scatter_gather` to save result of each process # in LOGS.DIR/tmp and will be cleared after gathering. outputs = scatter_gather(outputs_per_gpu) targets = scatter_gather(targets_per_gpu) file_paths = scatter_gather(file_path_per_gpu) if not is_main_process(): return None all_outputs = {k: v.numpy() for o in outputs for k, v in o.items()} all_targets = {k: v.numpy() for t in targets for k, v in t.items()} all_file_paths = {k: v for f in file_paths for k, v in f.items()} assert len(all_outputs) == len(dataset.all_files), \ '%d vs %d' % (len(all_outputs), len(dataset.all_files)) if cfg.LOGS.SAVE_RESULT is False: all_file_paths = None metrics = evaluate(dataset, all_outputs, all_targets, all_file_paths) return metrics
def validate(val_loader, model, criterion, cfg, meters, global_step=0, device='cuda', local_rank=-1): # compute target to model output map from target file target_map = None if cfg.DATA.TARGETMAP: target_file = os.path.join(cfg.DATA.PATH, cfg.DATA.TARGETMAP) if os.path.isfile(target_file): target_file = json.load(open(target_file)) target_file = { key: val[:val.rfind('(')] for key, val in target_file.items() } if hasattr(val_loader.dataset, 'labelmap') and val_loader.dataset.labelmap is not None: labelmap = val_loader.dataset.labelmap target_map = {} for objectname, objectid in labelmap.items(): target_map[objectid] = [] for imagenetname, objectalias in target_file.items(): if objectname == objectalias or objectname.startswith( objectalias + '('): target_map[objectid].append(int(imagenetname)) else: logging.warning( "Given validation dataset does not have labelmap!") else: logging.warning("Given target map file {} does not exists!".format( target_file)) # switch to evaluate mode model.eval() results_dict = {} total_loss = 0 total_cnt = 0 total_top1 = 0 total_top5 = 0 dataset_len = len(val_loader.dataset) with torch.no_grad(): start = time.time() for i, batch in enumerate(val_loader): image, target, img_id = batch[0], batch[1], batch[2:] if len(img_id) == 0: img_id = range( dataset_len * local_rank + total_cnt, dataset_len * local_rank + total_cnt + image.size(0)) else: img_id = img_id[0].tolist() image, target = image.to(device, non_blocking=True), target.to( device, non_blocking=True) with autocast(enabled=cfg.AMP.ENABLED): if cfg.AMP.ENABLED and cfg.AMP.MEMORY_FORMAT == 'nwhc': image = image.contiguous(memory_format=torch.channels_last) target = target.contiguous( memory_format=torch.channels_last) # compute output and record loss output = model(image) loss = criterion(output, target) total_loss += loss.item() total_cnt += image.size(0) # measure and record accuracy if cfg.LOSS.LOSS == "xentropy": precision = compute_accuracy(output, target, topk=(1, 5), target_map=target_map) # B*2 score = precision.sum(0, keepdim=False) total_top1 += score[0].item() total_top5 += score[1].item() if cfg.EVALUATE: results_dict.update({ im_id: (prec, label) for im_id, prec, label in zip( img_id, precision.to(torch.device("cpu")), target.to(torch.device("cpu"))) }) else: raise ValueError("Only xentropy loss is supported!") # measure elapsed time total_time = time.time() - start # measure epoch metrics test_metrics = torch.Tensor( [total_loss, total_time, total_cnt, total_top1, total_top5]).to(device) if cfg.distributed: torch.distributed.all_reduce(test_metrics) if cfg.EVALUATE: results_dict = _accumulate_predictions_from_multiple_gpus( results_dict, cfg.GATHER_ON_CPU, ) test_loss_gathered = test_metrics[0] / test_metrics[2] test_time_gathered = test_metrics[1] / test_metrics[2] metrics = { 'top1': 100 * test_metrics[3] / test_metrics[2], 'top5': 100 * test_metrics[4] / test_metrics[2] } output = metrics['top1'].item() if not is_main_process(): # let the main process do the final computing return output if cfg.EVALUATE: assert len(results_dict) == len(val_loader.dataset), \ "Number of gathered items {} does not match the dataset size {}!" .format(len(results_dict), len(val_loader.dataset)) scores = torch.stack([val[0] for key, val in results_dict.items()]) targets = torch.stack( [val[1] for key, val in results_dict.items()]) metrics = output_metrics( scores, targets=targets if cfg.OUTPUT_PERCLASS_ACC else None) logging.info("ACCURACY: {}%".format(metrics['top1'])) meters.update_metrics({ 'epoch_metrics': { 'total_cnt': float(test_metrics[2]), 'loss': test_loss_gathered, 'time': test_time_gathered }, 'accuracy_metrics': metrics }) logging.info( meters.delimiter.join([ "iter: {iter}", "max mem: {memory:.0f}", ]).format( iter=global_step, memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) + "\n " + meters.get_logs(int(global_step))) # save per image result if cfg.EVALUATE and hasattr(val_loader.dataset, 'get_img_key'): results_dict = { val_loader.dataset.get_img_key(key): val for key, val in results_dict.items() } torch.save(results_dict, os.path.join(meters.tb_logger.logdir, 'results.pth')) return output
def do_infer( model, data_loader, dataset_name, device="cuda", output_folder=None, ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = get_world_size() logger = logging.getLogger("EfficientDet.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format( dataset_name, len(dataset))) total_timer = Timer() inference_timer = Timer() total_timer.tic() predictions = compute_on_dataset(model, data_loader, device, inference_timer) # wait for all processes to complete before measuring the time synchronize() total_time = total_timer.toc() total_time_str = get_time_str(total_time) logger.info( "Total run time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices)) total_infer_time = get_time_str(inference_timer.total_time) logger.info( "Model inference time: {} ({} s / img per device, on {} devices)". format( total_infer_time, inference_timer.total_time * num_devices / len(dataset), num_devices, )) predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return coco_results = [] image_ids = [] for image_id, prediction in enumerate(predictions): original_id = dataset.image_ids[image_id] image_ids.append(original_id) coco_results.extend([{ "image_id": original_id, "category_id": dataset.return_coco_label(e['class']), "bbox": e['bbox'], "score": e['score'] } for e in prediction]) map_05_09 = 0 with tempfile.NamedTemporaryFile() as f: file_path = f.name output_folder = './' if output_folder: file_path = os.path.join(output_folder, 'bbox_results.json') with open(file_path, "w") as w_obj: json.dump(coco_results, w_obj) # load results in COCO evaluation tool coco_true = dataset.coco coco_pred = coco_true.loadRes(file_path) # run COCO evaluation coco_eval = COCOeval(coco_true, coco_pred, 'bbox') coco_eval.params.imgIds = image_ids coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() map_05_09 = coco_eval.stats[0] return map_05_09