def validate(args, val_loader, model, criterion, epoch): """ Run evaluation """ top1 = utils.AverageMeter() # switch to evaluate mode model = flopscounter.add_flops_counting_methods(model) model.eval().start_flops_count() model.reset_flops_count() num_step = len(val_loader) with torch.no_grad(): for input, target in tqdm.tqdm(val_loader, total=num_step, ascii=True, mininterval=5): input = input.to(device=device, non_blocking=True) target = target.to(device=device, non_blocking=True) # compute output meta = { 'masks': [], 'device': device, 'gumbel_temp': 1.0, 'gumbel_noise': False, 'epoch': epoch } output, meta = model(input, meta) output = output.float() # measure accuracy and record loss prec1 = utils.accuracy(output.data, target)[0] top1.update(prec1.item(), input.size(0)) if args.plot_ponder: viz.plot_image(input) viz.plot_ponder_cost(meta['masks']) viz.plot_masks(meta['masks']) plt.show() print(f'* Epoch {epoch} - Prec@1 {top1.avg:.3f}') print( f'* average FLOPS (multiply-accumulates, MACs) per image: {model.compute_average_flops_cost()[0]/1e6:.6f} MMac' ) model.stop_flops_count() return top1.avg
def speedtest(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, epoch, writer_dict=None): ''' Speedtest mode first warms up on half the test size (especially Pytorch CUDA benchmark mode needs warmup to optimize operations), and then performs the speedtest on the other half ''' # switch to evaluate mode model.eval() idx = 0 logger.info(f'# SPEEDTEST: EPOCH {epoch}') logger.info('\n\n>> WARMUP') model = add_flops_counting_methods(model) model.start_flops_count() with torch.no_grad(): val_iter = val_loader.__iter__() num_step = len(val_iter) for i in range(num_step): if i == num_step // 2: avg_flops, total_flops, batch_count = model.compute_average_flops_cost( ) logger.info( f'# PARAMS {get_model_parameters_number(model, as_string=False)/1e6}M' ) logger.info( f'# FLOPS (multiply-accumulates, MACs): {(total_flops/idx)/1e9} G on {idx} images (batch_count={batch_count})' ) model.stop_flops_count() idx = 0 logger.info('\n\n>> SPEEDTEST') torch.cuda.synchronize() START = time.perf_counter() input, _, _, _ = next(val_iter) input = input.cuda(non_blocking=True) dynconv_meta = make_dynconv_meta(config, epoch, i) outputs, dynconv_meta = model(input, dynconv_meta) output = outputs[-1] if isinstance(outputs, list) else outputs if config.TEST.FLIP_TEST: input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 num_images = input.size(0) idx += num_images torch.cuda.synchronize() STOP = time.perf_counter() samples_per_second = idx / (STOP - START) logger.info( f'ELAPSED TIME: {(STOP-START)}s, SAMPLES PER SECOND: {samples_per_second} ON {idx} SAMPLES' ) return idx / (STOP - START)
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, epoch, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 logger.info(f'# VALIDATE: EPOCH {epoch}') model = add_flops_counting_methods(model) model.start_flops_count() model.eval() flops_per_layer = [] total_per_layer = [] with torch.no_grad(): end = time.time() val_iter = val_loader.__iter__() num_step = len(val_iter) for i in range(num_step): input, target, target_weight, meta = next(val_iter) input = input.to('cuda', non_blocking=True) dynconv_meta = make_dynconv_meta(config, epoch, i) outputs, dynconv_meta = model(input, dynconv_meta) if 'masks' in dynconv_meta: percs, cost, total = dynconv.cost_per_layer(dynconv_meta) flops_per_layer.append(cost) total_per_layer.append(total) output = outputs[-1] if isinstance(outputs, list) else outputs # if config.TEST.FLIP_TEST: # flip not supported for dynconv # # this part is ugly, because pytorch has not supported negative index # # input_flipped = model(input[:, :, :, ::-1]) # input_flipped = np.flip(input.cpu().numpy(), 3).copy() # input_flipped = torch.from_numpy(input_flipped).cuda() # outputs_flipped = model(input_flipped) # if isinstance(outputs_flipped, list): # output_flipped = outputs_flipped[-1] # else: # output_flipped = outputs_flipped # output_flipped = flip_back(output_flipped.cpu().numpy(), # val_dataset.flip_pairs) # output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # # feature is not aligned, shift flipped heatmap for higher accuracy # if config.TEST.SHIFT_HEATMAP: # output_flipped[:, :, :, 1:] = \ # output_flipped.clone()[:, :, :, 0:-1] # output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() output_np = output.clone().cpu().numpy() preds_rel, maxvals_rel = get_max_preds(output_np) preds, maxvals = get_final_preds(config, output_np, c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) if config.DEBUG.PONDER: img = viz.frame2mpl(input[0], denormalize=True) img = viz.add_skeleton(img, preds_rel[0] * 4, maxvals_rel[0], thres=0.2) plt.figure() plt.title('input') plt.imshow(img) ponder_cost = dynconv.ponder_cost_map(dynconv_meta['masks']) if ponder_cost is not None: plt.figure() plt.title('ponder cost map') plt.imshow(ponder_cost, vmin=2, vmax=len(dynconv_meta['masks']) - 2) plt.colorbar() else: logger.info('Not a sparse model - no ponder cost') viz.showKey() name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 avg_flops, total_flops, batch_count = model.compute_average_flops_cost() logger.info( f'# PARAMS: {get_model_parameters_number(model, as_string=False)/1e6} M' ) logger.info( f'# FLOPS (multiply-accumulates, MACs): {(total_flops/idx)/1e9} GMacs on {idx} images' ) # some conditional execution statistics if len(flops_per_layer) > 0: flops_per_layer = torch.cat(flops_per_layer, dim=0) total_per_layer = torch.cat(total_per_layer, dim=0) perc_per_layer = flops_per_layer / total_per_layer perc_per_layer_avg = perc_per_layer.mean(dim=0) perc_per_layer_std = perc_per_layer.std(dim=0) s = '' for perc in perc_per_layer_avg: s += f'{round(float(perc), 2)}, ' logger.info( f'# FLOPS (multiply-accumulates MACs) used percentage per layer (average): {s}' ) s = '' for std in perc_per_layer_std: s += f'{round(float(std), 2)}, ' logger.info( f'# FLOPS (multiply-accumulates MACs) used percentage per layer (standard deviation): {s}' ) exec_cond_flops = int(torch.sum(flops_per_layer)) / idx total_cond_flops = int(torch.sum(total_per_layer)) / idx logger.info( f'# Conditional FLOPS (multiply-accumulates MACs) over all layers (average per image): {exec_cond_flops/1e9} GMac out of {total_cond_flops/1e9} GMac ({round(100*exec_cond_flops/total_cond_flops,1)}%)' ) return perf_indicator