Exemplo n.º 1
0
def validate(args, val_loader, model, criterion, epoch):
    """
    Run evaluation
    """
    top1 = utils.AverageMeter()

    # switch to evaluate mode
    model = flopscounter.add_flops_counting_methods(model)
    model.eval().start_flops_count()
    model.reset_flops_count()

    num_step = len(val_loader)
    with torch.no_grad():
        for input, target in tqdm.tqdm(val_loader,
                                       total=num_step,
                                       ascii=True,
                                       mininterval=5):
            input = input.to(device=device, non_blocking=True)
            target = target.to(device=device, non_blocking=True)

            # compute output
            meta = {
                'masks': [],
                'device': device,
                'gumbel_temp': 1.0,
                'gumbel_noise': False,
                'epoch': epoch
            }
            output, meta = model(input, meta)
            output = output.float()

            # measure accuracy and record loss
            prec1 = utils.accuracy(output.data, target)[0]
            top1.update(prec1.item(), input.size(0))

            if args.plot_ponder:
                viz.plot_image(input)
                viz.plot_ponder_cost(meta['masks'])
                viz.plot_masks(meta['masks'])
                plt.show()

    print(f'* Epoch {epoch} - Prec@1 {top1.avg:.3f}')
    print(
        f'* average FLOPS (multiply-accumulates, MACs) per image:  {model.compute_average_flops_cost()[0]/1e6:.6f} MMac'
    )
    model.stop_flops_count()
    return top1.avg
Exemplo n.º 2
0
def speedtest(config,
              val_loader,
              val_dataset,
              model,
              criterion,
              output_dir,
              tb_log_dir,
              epoch,
              writer_dict=None):
    '''
    Speedtest mode first warms up on half the test size (especially Pytorch
    CUDA benchmark mode needs warmup to optimize operations), 
    and then performs the speedtest on the other half
    '''

    # switch to evaluate mode
    model.eval()

    idx = 0

    logger.info(f'# SPEEDTEST: EPOCH {epoch}')

    logger.info('\n\n>> WARMUP')
    model = add_flops_counting_methods(model)
    model.start_flops_count()
    with torch.no_grad():
        val_iter = val_loader.__iter__()
        num_step = len(val_iter)
        for i in range(num_step):

            if i == num_step // 2:
                avg_flops, total_flops, batch_count = model.compute_average_flops_cost(
                )
                logger.info(
                    f'# PARAMS {get_model_parameters_number(model, as_string=False)/1e6}M'
                )
                logger.info(
                    f'# FLOPS (multiply-accumulates, MACs): {(total_flops/idx)/1e9} G on {idx} images (batch_count={batch_count})'
                )
                model.stop_flops_count()
                idx = 0
                logger.info('\n\n>> SPEEDTEST')
                torch.cuda.synchronize()
                START = time.perf_counter()

            input, _, _, _ = next(val_iter)
            input = input.cuda(non_blocking=True)
            dynconv_meta = make_dynconv_meta(config, epoch, i)
            outputs, dynconv_meta = model(input, dynconv_meta)

            output = outputs[-1] if isinstance(outputs, list) else outputs
            if config.TEST.FLIP_TEST:
                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
                input_flipped = torch.from_numpy(input_flipped).cuda()
                outputs_flipped = model(input_flipped)

                if isinstance(outputs_flipped, list):
                    output_flipped = outputs_flipped[-1]
                else:
                    output_flipped = outputs_flipped

                output_flipped = flip_back(output_flipped.cpu().numpy(),
                                           val_dataset.flip_pairs)
                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()

                # feature is not aligned, shift flipped heatmap for higher accuracy
                if config.TEST.SHIFT_HEATMAP:
                    output_flipped[:, :, :, 1:] = \
                        output_flipped.clone()[:, :, :, 0:-1]

                output = (output + output_flipped) * 0.5

            num_images = input.size(0)
            idx += num_images

    torch.cuda.synchronize()
    STOP = time.perf_counter()
    samples_per_second = idx / (STOP - START)
    logger.info(
        f'ELAPSED TIME: {(STOP-START)}s, SAMPLES PER SECOND: {samples_per_second} ON {idx} SAMPLES'
    )

    return idx / (STOP - START)
Exemplo n.º 3
0
def validate(config,
             val_loader,
             val_dataset,
             model,
             criterion,
             output_dir,
             tb_log_dir,
             epoch,
             writer_dict=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to evaluate mode
    model.eval()

    num_samples = len(val_dataset)
    all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3),
                         dtype=np.float32)
    all_boxes = np.zeros((num_samples, 6))
    image_path = []
    filenames = []
    imgnums = []
    idx = 0

    logger.info(f'# VALIDATE: EPOCH {epoch}')

    model = add_flops_counting_methods(model)
    model.start_flops_count()
    model.eval()

    flops_per_layer = []
    total_per_layer = []

    with torch.no_grad():
        end = time.time()
        val_iter = val_loader.__iter__()
        num_step = len(val_iter)
        for i in range(num_step):
            input, target, target_weight, meta = next(val_iter)
            input = input.to('cuda', non_blocking=True)

            dynconv_meta = make_dynconv_meta(config, epoch, i)
            outputs, dynconv_meta = model(input, dynconv_meta)

            if 'masks' in dynconv_meta:
                percs, cost, total = dynconv.cost_per_layer(dynconv_meta)
                flops_per_layer.append(cost)
                total_per_layer.append(total)

            output = outputs[-1] if isinstance(outputs, list) else outputs

            # if config.TEST.FLIP_TEST:
            # flip not supported for dynconv
            #     # this part is ugly, because pytorch has not supported negative index
            #     # input_flipped = model(input[:, :, :, ::-1])
            #     input_flipped = np.flip(input.cpu().numpy(), 3).copy()
            #     input_flipped = torch.from_numpy(input_flipped).cuda()
            #     outputs_flipped = model(input_flipped)

            #     if isinstance(outputs_flipped, list):
            #         output_flipped = outputs_flipped[-1]
            #     else:
            #         output_flipped = outputs_flipped

            #     output_flipped = flip_back(output_flipped.cpu().numpy(),
            #                                val_dataset.flip_pairs)
            #     output_flipped = torch.from_numpy(output_flipped.copy()).cuda()

            #     # feature is not aligned, shift flipped heatmap for higher accuracy
            #     if config.TEST.SHIFT_HEATMAP:
            #         output_flipped[:, :, :, 1:] = \
            #             output_flipped.clone()[:, :, :, 0:-1]

            #     output = (output + output_flipped) * 0.5

            target = target.cuda(non_blocking=True)
            target_weight = target_weight.cuda(non_blocking=True)

            loss = criterion(output, target, target_weight)

            num_images = input.size(0)
            # measure accuracy and record loss
            losses.update(loss.item(), num_images)
            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
                                             target.cpu().numpy())
            acc.update(avg_acc, cnt)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            c = meta['center'].numpy()
            s = meta['scale'].numpy()
            score = meta['score'].numpy()

            output_np = output.clone().cpu().numpy()
            preds_rel, maxvals_rel = get_max_preds(output_np)
            preds, maxvals = get_final_preds(config, output_np, c, s)

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = maxvals
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1)
            all_boxes[idx:idx + num_images, 5] = score
            image_path.extend(meta['image'])

            idx += num_images

            if i % config.PRINT_FREQ == 0:
                msg = 'Test: [{0}/{1}]\t' \
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
                      'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time,
                          loss=losses, acc=acc)
                logger.info(msg)

                prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i)

                save_debug_images(config, input, meta, target, pred * 4,
                                  output, prefix)

            if config.DEBUG.PONDER:
                img = viz.frame2mpl(input[0], denormalize=True)
                img = viz.add_skeleton(img,
                                       preds_rel[0] * 4,
                                       maxvals_rel[0],
                                       thres=0.2)

                plt.figure()
                plt.title('input')
                plt.imshow(img)
                ponder_cost = dynconv.ponder_cost_map(dynconv_meta['masks'])
                if ponder_cost is not None:
                    plt.figure()
                    plt.title('ponder cost map')
                    plt.imshow(ponder_cost,
                               vmin=2,
                               vmax=len(dynconv_meta['masks']) - 2)
                    plt.colorbar()
                else:
                    logger.info('Not a sparse model - no ponder cost')
                viz.showKey()

        name_values, perf_indicator = val_dataset.evaluate(
            config, all_preds, output_dir, all_boxes, image_path, filenames,
            imgnums)

        model_name = config.MODEL.NAME
        if isinstance(name_values, list):
            for name_value in name_values:
                _print_name_value(name_value, model_name)
        else:
            _print_name_value(name_values, model_name)

        if writer_dict:
            writer = writer_dict['writer']
            global_steps = writer_dict['valid_global_steps']
            writer.add_scalar('valid_loss', losses.avg, global_steps)
            writer.add_scalar('valid_acc', acc.avg, global_steps)
            if isinstance(name_values, list):
                for name_value in name_values:
                    writer.add_scalars('valid', dict(name_value), global_steps)
            else:
                writer.add_scalars('valid', dict(name_values), global_steps)
            writer_dict['valid_global_steps'] = global_steps + 1

    avg_flops, total_flops, batch_count = model.compute_average_flops_cost()
    logger.info(
        f'# PARAMS: {get_model_parameters_number(model, as_string=False)/1e6} M'
    )
    logger.info(
        f'# FLOPS (multiply-accumulates, MACs): {(total_flops/idx)/1e9} GMacs on {idx} images'
    )

    # some conditional execution statistics
    if len(flops_per_layer) > 0:
        flops_per_layer = torch.cat(flops_per_layer, dim=0)
        total_per_layer = torch.cat(total_per_layer, dim=0)

        perc_per_layer = flops_per_layer / total_per_layer

        perc_per_layer_avg = perc_per_layer.mean(dim=0)
        perc_per_layer_std = perc_per_layer.std(dim=0)

        s = ''
        for perc in perc_per_layer_avg:
            s += f'{round(float(perc), 2)}, '
        logger.info(
            f'# FLOPS (multiply-accumulates MACs) used percentage per layer (average): {s}'
        )

        s = ''
        for std in perc_per_layer_std:
            s += f'{round(float(std), 2)}, '
        logger.info(
            f'# FLOPS (multiply-accumulates MACs) used percentage per layer (standard deviation): {s}'
        )

        exec_cond_flops = int(torch.sum(flops_per_layer)) / idx
        total_cond_flops = int(torch.sum(total_per_layer)) / idx
        logger.info(
            f'# Conditional FLOPS (multiply-accumulates MACs) over all layers (average per image): {exec_cond_flops/1e9} GMac out of {total_cond_flops/1e9} GMac ({round(100*exec_cond_flops/total_cond_flops,1)}%)'
        )

    return perf_indicator