示例#1
0
def test(cfg, local_rank, distributed, logger=None):
    device = torch.device('cuda')
    cpu_device = torch.device('cpu')

    # create model
    logger.info("Creating model \"{}\"".format(cfg.MODEL.ARCHITECTURE))
    model = build_model(cfg).to(device)
    criterion = torch.nn.CrossEntropyLoss(ignore_index=255).to(device)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            broadcast_buffers=True,
        )

    # checkpoint
    checkpointer = Checkpointer(model, save_dir=cfg.LOGS.DIR, logger=logger)
    _ = checkpointer.load(f=cfg.MODEL.WEIGHT)

    # data_loader
    logger.info('Loading dataset "{}"'.format(cfg.DATASETS.TEST))
    stage = cfg.DATASETS.TEST.split('_')[-1]
    data_loader = make_data_loader(cfg, stage, distributed)
    dataset_name = cfg.DATASETS.TEST

    metrics = inference(model, criterion, data_loader, dataset_name, True)

    if is_main_process():
        logger.info("Metrics:")
        for k, v in metrics.items():
            logger.info("{}: {}".format(k, v))
示例#2
0
def inference(
        model,
        data_loader,
        dataset_name,
        iou_types=("bbox", ),
        box_only=False,
        device="cuda",
        expected_results=(),
        expected_results_sigma_tol=4,
        output_folder=None,
):
    # convert to a torch.device for efficiency
    device = torch.device(device)
    num_devices = (torch.distributed.get_world_size()
                   if torch.distributed.is_initialized() else 1)
    logger = logging.getLogger("maskrcnn_benchmark.inference")
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset({} images).".format(
        dataset_name, len(dataset)))
    start_time = time.time()
    predictions = compute_on_dataset(model, data_loader, device)
    # wait for all processes to complete before measuring the time
    synchronize()
    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    logger.info(
        "Total inference time: {} ({} s / img per device, on {} devices)".
        format(total_time_str, total_time * num_devices / len(dataset),
               num_devices))

    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    if not is_main_process():
        return

    if output_folder:
        torch.save(predictions, os.path.join(output_folder, "predictions.pth"))

    extra_args = dict(
        box_only=box_only,
        iou_types=iou_types,
        expected_results=expected_results,
        expected_results_sigma_tol=expected_results_sigma_tol,
    )

    return evaluate(dataset=dataset,
                    predictions=predictions,
                    output_folder=output_folder,
                    **extra_args)
示例#3
0
def init_pretrained_weights(key):
    """Initializes model with pretrained weights.

    Layers that don't match with pretrained layers in name or size are kept unchanged.
    """
    import os
    import errno
    import gdown

    def _get_torch_home():
        ENV_TORCH_HOME = 'TORCH_HOME'
        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
        DEFAULT_CACHE_DIR = '~/.cache'
        torch_home = os.path.expanduser(
            os.getenv(
                ENV_TORCH_HOME,
                os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR),
                             'torch')))
        return torch_home

    torch_home = _get_torch_home()
    model_dir = os.path.join(torch_home, 'checkpoints')
    try:
        os.makedirs(model_dir)
    except OSError as e:
        if e.errno == errno.EEXIST:
            # Directory already exists, ignore.
            pass
        else:
            # Unexpected OSError, re-raise.
            raise

    filename = model_urls[key].split('/')[-1]

    cached_file = os.path.join(model_dir, filename)

    if not os.path.exists(cached_file):
        if comm.is_main_process():
            gdown.download(model_urls[key], cached_file, quiet=False)

    comm.synchronize()

    logger.info(f"Loading pretrained model from {cached_file}")
    state_dict = torch.load(cached_file, map_location=torch.device('cpu'))

    return state_dict
def cache_url(url, model_dir=None, progress=True):
    r"""Loads the Torch serialized object at the given URL.
    If the object is already present in `model_dir`, it's deserialized and
    returned. The filename part of the URL should follow the naming convention
    ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
    digits of the SHA256 hash of the contents of the file. The hash is used to
    ensure unique names and to verify the contents of the file.
    The default value of `model_dir` is ``$TORCH_HOME/models`` where
    ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
    overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
    Args:
        url (string): URL of the object to download
        model_dir (string, optional): directory in which to save the object
        progress (bool, optional): whether or not to display a progress bar to stderr
    Example:
        >>> cached_file = utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
    """
    if model_dir is None:
        torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
        model_dir = os.getenv("TORCH_MODEL_ZOO",
                              os.path.join(torch_home, "models"))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    parts = urlparse(url)
    filename = os.path.basename(parts.path)
    if filename == "model_final.pkl":
        # workaround as pre-trained Caffe2 models from Detectron have all the same filename
        # so make the full path the filename by replacing / with _
        filename = parts.path.replace("/", "_")
    cached_file = os.path.join(model_dir, filename)
    if not os.path.exists(cached_file) and is_main_process():
        sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
        hash_prefix = HASH_REGEX.search(filename)
        if hash_prefix is not None:
            hash_prefix = hash_prefix.group(1)
            # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
            # which matches the hash PyTorch uses. So we skip the hash matching
            # if the hash_prefix is less than 6 characters
            if len(hash_prefix) < 6:
                hash_prefix = None
        _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
    synchronize()
    return cached_file
示例#5
0
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu):
    all_predictions = all_gather(predictions_per_gpu)
    if not is_main_process():
        return
    # merge the list of dicts
    predictions = {}
    for p in all_predictions:
        predictions.update(p)
    # convert a dict where the key is the index in a list
    video_ids = list(sorted(predictions.keys()))
    if len(video_ids) != video_ids[-1] + 1:
        logger = logging.getLogger("AlphAction.inference")
        logger.warning(
            "Number of videos that were gathered from multiple processes is not "
            "a contiguous set. Some images might be missing from the evaluation"
        )

    # convert to a list
    predictions = [predictions[i] for i in video_ids]
    return predictions
示例#6
0
文件: trainer.py 项目: ecr23xx/eve
def val_in_train(model, criterion, dataset_name_val, data_loader_val,
                 tblogger, iteration, checkpointer, distributed):
    logger = logging.getLogger('eve.' + __name__)

    if distributed:
        model_val = model.module
    else:
        model_val = model

    # only main process will return result
    metrics = inference(model_val, criterion,
                        data_loader_val, dataset_name_val)

    synchronize()

    if is_main_process():
        if tblogger is not None:
            for k, v in metrics.items():
                tblogger.add_scalar('val/' + k, v, iteration)
                logger.info("{}: {}".format(k, v))
        return metrics
    else:
        return None
示例#7
0
def inference(
        model,
        data_loader,
        dataset_name,
        mem_active=False,
        output_folder=None,
):
    # convert to a torch.device for efficiency
    device = torch.device("cuda")
    num_devices = get_world_size()
    logger = logging.getLogger("AlphAction.inference")
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset({} videos).".format(dataset_name, len(dataset)))
    start_time = time.time()
    predictions = compute_on_dataset(model, data_loader, device, logger, mem_active)
    # wait for all processes to complete before measuring the time
    synchronize()
    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=total_time))
    logger.info(
        "Total inference time: {} ({} s / video per device, on {} devices)".format(
            total_time_str, total_time * num_devices / len(dataset), num_devices
        )
    )

    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    if not is_main_process():
        return

    if output_folder:
        torch.save(predictions, os.path.join(output_folder, "predictions.pth"))

    return evaluate(
        dataset=dataset,
        predictions=predictions,
        output_folder=output_folder,
    )
示例#8
0
文件: inference.py 项目: ecr23xx/eve
def inference(model, criterion, data_loader, dataset_name, save_result=False):
    logger = logging.getLogger('eve.' + __name__)

    device = torch.device('cuda')
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset ({} point clouds).".format(
        dataset_name, len(dataset)))

    if get_world_size() == 1:
        extra_args = {}
    else:
        rank = get_rank()
        extra_args = dict(desc="rank {}".format(rank))

    start_time = time.time()

    model.eval()
    outputs_per_gpu = {}
    targets_per_gpu = {}
    file_path_per_gpu = {}

    times = []

    with torch.no_grad():
        for batch in tqdm(data_loader, **extra_args):
            locs, feats, targets, metadata = batch
            inputs = ME.SparseTensor(feats, coords=locs).to(device)
            targets = targets.to(device, non_blocking=True).long()

            torch.cuda.synchronize()
            start_time = time.time()
            outputs = model(inputs, y=targets)
            torch.cuda.synchronize()
            end_time = time.time()
            times.append(end_time - start_time)

            arch = cfg.MODEL.ARCHITECTURE
            if arch == 'minkunet4d' or arch == 'minkunet_eve':
                for batch_idx in range(len(metadata)):
                    for time_idx in range(cfg.INPUT.VIDEO.NUM_FRAMES):
                        inv_map = metadata[batch_idx][time_idx]['inverse_map']
                        file_path = metadata[batch_idx][time_idx]['file_path']

                        locs_frame = (locs[:, -1] == batch_idx) & \
                            (locs[:, -2] == time_idx)
                        one_output, one_target = compute_one_frame(
                            outputs, targets, locs_frame, inv_map)

                        outputs_per_gpu[file_path] = one_output
                        targets_per_gpu[file_path] = one_target
                        file_path_per_gpu[file_path] = file_path
            else:  # other minknet
                for batch_idx in range(len(metadata)):
                    inv_map = metadata[batch_idx]['inverse_map']
                    file_path = metadata[batch_idx]['file_path']

                    # From MinkowskiEngine v0.3, batch index is on the first column
                    locs_frame = locs[:, -1] == batch_idx
                    one_output, one_target = compute_one_frame(
                        outputs, targets, locs_frame, inv_map)

                    outputs_per_gpu[file_path] = one_output
                    targets_per_gpu[file_path] = one_target
                    file_path_per_gpu[file_path] = file_path

    synchronize()

    logger.info("Total inference time: {}".format(np.sum(times)))

    # NOTE: `all_gather` will lead to CUDA out of memory
    # We use `scatter_gather` to save result of each process
    # in LOGS.DIR/tmp and will be cleared after gathering.
    outputs = scatter_gather(outputs_per_gpu)
    targets = scatter_gather(targets_per_gpu)
    file_paths = scatter_gather(file_path_per_gpu)
    if not is_main_process():
        return None

    all_outputs = {k: v.numpy() for o in outputs for k, v in o.items()}
    all_targets = {k: v.numpy() for t in targets for k, v in t.items()}
    all_file_paths = {k: v for f in file_paths for k, v in f.items()}

    assert len(all_outputs) == len(dataset.all_files), \
        '%d vs %d' % (len(all_outputs), len(dataset.all_files))

    if cfg.LOGS.SAVE_RESULT is False:
        all_file_paths = None
    metrics = evaluate(dataset, all_outputs, all_targets, all_file_paths)

    return metrics
示例#9
0
def validate(val_loader,
             model,
             criterion,
             cfg,
             meters,
             global_step=0,
             device='cuda',
             local_rank=-1):
    # compute target to model output map from target file
    target_map = None
    if cfg.DATA.TARGETMAP:
        target_file = os.path.join(cfg.DATA.PATH, cfg.DATA.TARGETMAP)
        if os.path.isfile(target_file):
            target_file = json.load(open(target_file))
            target_file = {
                key: val[:val.rfind('(')]
                for key, val in target_file.items()
            }
            if hasattr(val_loader.dataset,
                       'labelmap') and val_loader.dataset.labelmap is not None:
                labelmap = val_loader.dataset.labelmap
                target_map = {}
                for objectname, objectid in labelmap.items():
                    target_map[objectid] = []
                    for imagenetname, objectalias in target_file.items():
                        if objectname == objectalias or objectname.startswith(
                                objectalias + '('):
                            target_map[objectid].append(int(imagenetname))
            else:
                logging.warning(
                    "Given validation dataset does not have labelmap!")
        else:
            logging.warning("Given target map file {} does not exists!".format(
                target_file))
    # switch to evaluate mode
    model.eval()
    results_dict = {}
    total_loss = 0
    total_cnt = 0
    total_top1 = 0
    total_top5 = 0
    dataset_len = len(val_loader.dataset)

    with torch.no_grad():
        start = time.time()
        for i, batch in enumerate(val_loader):
            image, target, img_id = batch[0], batch[1], batch[2:]
            if len(img_id) == 0:
                img_id = range(
                    dataset_len * local_rank + total_cnt,
                    dataset_len * local_rank + total_cnt + image.size(0))
            else:
                img_id = img_id[0].tolist()
            image, target = image.to(device, non_blocking=True), target.to(
                device, non_blocking=True)

            with autocast(enabled=cfg.AMP.ENABLED):
                if cfg.AMP.ENABLED and cfg.AMP.MEMORY_FORMAT == 'nwhc':
                    image = image.contiguous(memory_format=torch.channels_last)
                    target = target.contiguous(
                        memory_format=torch.channels_last)
                # compute output and record loss
                output = model(image)
                loss = criterion(output, target)

            total_loss += loss.item()
            total_cnt += image.size(0)

            # measure and record accuracy
            if cfg.LOSS.LOSS == "xentropy":
                precision = compute_accuracy(output,
                                             target,
                                             topk=(1, 5),
                                             target_map=target_map)  # B*2
                score = precision.sum(0, keepdim=False)
                total_top1 += score[0].item()
                total_top5 += score[1].item()
                if cfg.EVALUATE:
                    results_dict.update({
                        im_id: (prec, label)
                        for im_id, prec, label in zip(
                            img_id, precision.to(torch.device("cpu")),
                            target.to(torch.device("cpu")))
                    })
            else:
                raise ValueError("Only xentropy loss is supported!")

        # measure elapsed time
        total_time = time.time() - start

        # measure epoch metrics
        test_metrics = torch.Tensor(
            [total_loss, total_time, total_cnt, total_top1,
             total_top5]).to(device)
        if cfg.distributed:
            torch.distributed.all_reduce(test_metrics)
            if cfg.EVALUATE:
                results_dict = _accumulate_predictions_from_multiple_gpus(
                    results_dict,
                    cfg.GATHER_ON_CPU,
                )

        test_loss_gathered = test_metrics[0] / test_metrics[2]
        test_time_gathered = test_metrics[1] / test_metrics[2]
        metrics = {
            'top1': 100 * test_metrics[3] / test_metrics[2],
            'top5': 100 * test_metrics[4] / test_metrics[2]
        }

        output = metrics['top1'].item()
        if not is_main_process():
            # let the main process do the final computing
            return output

        if cfg.EVALUATE:
            assert len(results_dict) == len(val_loader.dataset), \
                "Number of gathered items {} does not match the dataset size {}!" .format(len(results_dict), len(val_loader.dataset))
            scores = torch.stack([val[0] for key, val in results_dict.items()])
            targets = torch.stack(
                [val[1] for key, val in results_dict.items()])
            metrics = output_metrics(
                scores, targets=targets if cfg.OUTPUT_PERCLASS_ACC else None)
        logging.info("ACCURACY: {}%".format(metrics['top1']))
        meters.update_metrics({
            'epoch_metrics': {
                'total_cnt': float(test_metrics[2]),
                'loss': test_loss_gathered,
                'time': test_time_gathered
            },
            'accuracy_metrics': metrics
        })
        logging.info(
            meters.delimiter.join([
                "iter: {iter}",
                "max mem: {memory:.0f}",
            ]).format(
                iter=global_step,
                memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
            ) + "\n    " + meters.get_logs(int(global_step)))

        # save per image result
        if cfg.EVALUATE and hasattr(val_loader.dataset, 'get_img_key'):
            results_dict = {
                val_loader.dataset.get_img_key(key): val
                for key, val in results_dict.items()
            }
            torch.save(results_dict,
                       os.path.join(meters.tb_logger.logdir, 'results.pth'))

    return output
def do_infer(
    model,
    data_loader,
    dataset_name,
    device="cuda",
    output_folder=None,
):
    # convert to a torch.device for efficiency
    device = torch.device(device)
    num_devices = get_world_size()
    logger = logging.getLogger("EfficientDet.inference")
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset({} images).".format(
        dataset_name, len(dataset)))
    total_timer = Timer()
    inference_timer = Timer()
    total_timer.tic()
    predictions = compute_on_dataset(model, data_loader, device,
                                     inference_timer)
    # wait for all processes to complete before measuring the time
    synchronize()
    total_time = total_timer.toc()
    total_time_str = get_time_str(total_time)
    logger.info(
        "Total run time: {} ({} s / img per device, on {} devices)".format(
            total_time_str, total_time * num_devices / len(dataset),
            num_devices))
    total_infer_time = get_time_str(inference_timer.total_time)
    logger.info(
        "Model inference time: {} ({} s / img per device, on {} devices)".
        format(
            total_infer_time,
            inference_timer.total_time * num_devices / len(dataset),
            num_devices,
        ))

    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    if not is_main_process():
        return

    coco_results = []
    image_ids = []
    for image_id, prediction in enumerate(predictions):
        original_id = dataset.image_ids[image_id]
        image_ids.append(original_id)
        coco_results.extend([{
            "image_id":
            original_id,
            "category_id":
            dataset.return_coco_label(e['class']),
            "bbox":
            e['bbox'],
            "score":
            e['score']
        } for e in prediction])

    map_05_09 = 0
    with tempfile.NamedTemporaryFile() as f:
        file_path = f.name
        output_folder = './'
        if output_folder:
            file_path = os.path.join(output_folder, 'bbox_results.json')
        with open(file_path, "w") as w_obj:
            json.dump(coco_results, w_obj)

        # load results in COCO evaluation tool
        coco_true = dataset.coco
        coco_pred = coco_true.loadRes(file_path)

        # run COCO evaluation
        coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
        coco_eval.params.imgIds = image_ids
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

        map_05_09 = coco_eval.stats[0]
    return map_05_09