def model_input(input): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 return output
def estimate_heatmaps(self, images, flip=False): is_batched = _check_batched(images) raw_images = images if is_batched else images.unsqueeze(0) input_tensor = torch.empty((len(raw_images), 3, *self.input_shape), device=self.device, dtype=torch.float32) for i, raw_image in enumerate(raw_images): input_tensor[i] = self.prepare_image(raw_image) heatmaps = self.do_forward(input_tensor)[-1].cpu() if flip: flip_input = fliplr(input_tensor) flip_heatmaps = self.do_forward(flip_input)[-1].cpu() heatmaps += flip_back(flip_heatmaps, self.data_info.hflip_indices) heatmaps /= 2 if is_batched: return heatmaps else: return heatmaps[0]
def get_multi_scale_outputs(config, model, image, with_flip=False, project2image=False, size_projected=None, val_dataset=None): # compute output _, outputs = model(image) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(image.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() _, outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] heatmap = (output + output_flipped) * 0.5 if project2image and size_projected: heatmap = torch.nn.functional.interpolate(heatmap, size=(size_projected[1], size_projected[0]), mode='bilinear', align_corners=False) return heatmap
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input_, target, target_weight, meta) in enumerate(val_loader): # compute output if meta['image_id'] != 10003420000: continue root = config.DATASET.ROOT file_name = index_to_path(root, meta['image_id'][0].item()) data_numpy = cv2.imread( file_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) c_dt = meta['center'][0].numpy() s_dt = meta['scale'][0].numpy() r = 0 trans = get_affine_transform(c_dt, s_dt, r, config.MODEL.IMAGE_SIZE) input = cv2.warpAffine(data_numpy, trans, (int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) input = transform(input) # print(type(input)) # print(input.shape) new_input = np.zeros( [1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0]]) new_input[0, :, :, :] = input[:, :, :] input = torch.from_numpy(new_input).float() output = model(input) if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() output_flipped = model(input_flipped) output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() c_d = meta['center'].numpy() s_d = meta['scale'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c_d, s_d) print('id--{},\nkpts:\n{}'.format(meta['image_id'], preds[0])) # time.sleep(10) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) # if config.DATASET.DATASET == 'posetrack': # filenames.extend(meta['filename']) # imgnums.extend(meta['imgnum'].numpy()) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) _, full_arch_name = get_model_name(config) if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, full_arch_name) else: _print_name_value(name_values, full_arch_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def validate(dict_out, config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): ### output dictionary batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output output = model(input) if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() output_flipped = model(input_flipped) output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) if config.DATASET.DATASET == 'posetrack': filenames.extend(meta['filename']) imgnums.extend(meta['imgnum'].numpy()) idx += num_images # if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) #prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) #save_debug_images(config, input, meta, target, pred*4, output, # prefix) for b in range(len(meta['joints'])): reshape_pred = np.concatenate((pred[b]*4, np.ones((17, 1))), axis=1).reshape(-1) reshape_pred= reshape_pred.tolist() anno_dict = {'area': float((meta['bbox'][2][b]*meta['bbox'][3][b]).data.numpy()), 'bbox': [float(meta['bbox'][0][b].data.numpy()),float(meta['bbox'][1][b].data.numpy()), float(meta['bbox'][2][b].data.numpy()),float(meta['bbox'][3][b].data.numpy())], 'category_id' : 1, 'id': int(meta['id'][b].data.numpy()), 'image_id': int(meta['image_id'][b].data.numpy()), 'iscrowd': 0, 'keypoints': reshape_pred, 'num_keypoints': len(meta['joints'][b].data.numpy()) } dict_out['annotations'].append(anno_dict) ### save json # with open('all_posetrack_train_pred.json', 'w') as fp: # json.dump(dict_out, fp) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) _, full_arch_name = get_model_name(config) if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, full_arch_name) else: _print_name_value(name_values, full_arch_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def validate(config, device, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): """ valid data를 모델에 넣어 모델을 평가합니다. Parameters ---------- config : yacs.config.CfgNode config 파일입니다. device : torch.device GPU 사용시 데이터를 GPU에 넣어주는 객체입니다. val_loader : torch.utils.data.dataloader.DataLoader validation data Loader. val_dataset : dataset.dataset validation dataset. model : model 학습하는 모델 객체입니다. criterion : torch.nn.modules.loss torch의 loss 객체입니다. output_dir : str 결과값이 저장될 경로입니다. tb_log_dir : str log 파일 위치입니다. writer_dict : dict, optional 실험 기록 dict입니다. The default is None. Returns ------- losses.avg : float 예측된 heatmap loss의 평균값입니다. f_losses.avg : float 예측된 keypoint loss의 평균값입니다. """ batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() f_losses = AverageMeter() # switch to evaluate mode model.eval() idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # input과 bbox 객체를 GPU에 넣을 수 있는 객체로 만듭니다. input = input.to(device) input = input.float() target = target.to(device) target = target.float() outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs # 만약 TEST도 FLIP한다면 적용하는 옵션입니다. # 기본적으로는 False로 되어있어 통과합니다. if config.TEST.FLIP_TEST: input_flipped = input.flip(3) outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # heatmap을 원래 keypoint 데이터로 만들기 위해 meta 데이터의 center, scale 값을 구합니다. c = meta['center'].numpy() s = meta['scale'].numpy() # 예측된 heatmap을 keypoint 데이터로 만듭니다. preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) criterion2 = torch.nn.MSELoss() trues = meta['origin'][:,:,:2] trues = trues.reshape(trues.shape[0],-1) # 예측된 keypoint 값을 실제 keypoint 값과 비교합니다. f_loss = criterion2(torch.from_numpy(preds.reshape(preds.shape[0],-1)), trues) f_losses.update(f_loss.item(), num_images) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i ) save_debug_images(config, input, meta, target, pred*4, output, prefix) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar( 'valid_loss', losses.avg, global_steps ) writer_dict['valid_global_steps'] = global_steps + 1 # 예측된 heatmap 값, keypoint 값을 반환합니다. return losses.avg, f_losses.avg
def test(args): if args.dataset == 'coco': import lib.coco_reader as reader IMAGE_SIZE = [288, 384] FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] args.kp_dim = 17 elif args.dataset == 'mpii': import lib.mpii_reader as reader IMAGE_SIZE = [384, 384] FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] args.kp_dim = 16 else: raise ValueError('The dataset {} is not supported yet.'.format( args.dataset)) print_arguments(args) # Image and target image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32') file_id = layers.data(name='file_id', shape=[ 1, ], dtype='int') # Build model model = pose_resnet.ResNet(layers=50, kps_num=args.kp_dim, test_mode=True) # Output output = model.net(input=image, target=None, target_weight=None) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.checkpoint is not None: fluid.io.load_persistables(exe, args.checkpoint) # Dataloader test_reader = paddle.batch(reader.test(), batch_size=args.batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, file_id]) test_exe = fluid.ParallelExecutor( use_cuda=True if args.use_gpu else False, main_program=fluid.default_main_program().clone(for_test=True), loss_name=None) fetch_list = [image.name, output.name] for batch_id, data in enumerate(test_reader()): print_immediately("Processing batch #%d" % batch_id) num_images = len(data) file_ids = [] for i in range(num_images): file_ids.append(data[i][1]) input_image, out_heatmaps = test_exe.run(fetch_list=fetch_list, feed=feeder.feed(data)) if args.flip_test: # Flip all the images in a same batch data_fliped = [] for i in range(num_images): data_fliped.append((data[i][0][:, :, ::-1], data[i][1])) # Inference again _, output_flipped = test_exe.run(fetch_list=fetch_list, feed=feeder.feed(data_fliped)) # Flip back output_flipped = flip_back(output_flipped, FLIP_PAIRS) # Feature is not aligned, shift flipped heatmap for higher accuracy if args.shift_heatmap: output_flipped[:, :, :, 1:] = \ output_flipped.copy()[:, :, :, 0:-1] # Aggregate out_heatmaps = (out_heatmaps + output_flipped) * 0.5 save_predict_results(input_image, out_heatmaps, file_ids, fold_name='results')
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset)#2958 all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input)#<class 'torch.Tensor'> torch.Size([64, 16, 64, 64]) if isinstance(outputs, list): # output = outputs[-1]#只输出最后一个 l=0 c=1 j=0 k=0 # output = (l*outputs[0]+c*outputs[1]+j*outputs[2]+k*outputs[3]) output = (c*outputs[0]+j*outputs[1]+k*outputs[2]) else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda()#torch.Size([64, 3, 256, 256]) outputs_flipped = model(input_flipped)#torch.Size([64, 16, 64, 64]) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs)#将翻转过的输入变成正常的输出 output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy #【】为啥翻转的没对齐 if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] #【c】将0以后的图左移动 output = (output + output_flipped) * 0.5 #【see】妙啊 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True)#target_weight是否可见 loss = criterion(output, target, target_weight) # criterion(output, target, target_weight) #【see】 num_images = input.size(0)#求平均值用 # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy()#meta只获取当前loader的 64个 score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals#(2958, 16, 3) # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)#沿着axis=1也就是×200后再平方的面积 all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image'])#将路径信息传回meta idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg)#len(val_loader)是总的迭代次数 prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i )#'output/mpii/pose_hrnet/w32_256x256_adam_lr1e-3/val_0' save_debug_images(config, input, meta, target, pred*4, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums ) #【】这儿的作用是? model_name = config.MODEL.NAME#'pose_hrnet' if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name)#打印相关精度到终端 else: _print_name_value(name_values, model_name) if writer_dict: #【】None 是不是显示损失用的,怎么用 writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar( 'valid_loss', losses.avg, global_steps ) writer.add_scalar( 'valid_acc', acc.avg, global_steps ) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars( 'valid', dict(name_value), global_steps ) else: writer.add_scalars( 'valid', dict(name_values), global_steps ) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def output_preds(config, val_loader, val_dataset, model, criterion, output_dir): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # read the name of each image gt_file = os.path.join(config.DATASET.ROOT, 'annot', 'label_{}.csv'.format(config.DATASET.TEST_SET)) image_names = [] with open(gt_file) as annot_file: reader = csv.reader(annot_file, delimiter=',') for row in reader: image_names.append(row[0]) # create folder for output heatmaps output_heapmap_dir = os.path.join( output_dir, 'heatmap_{}'.format(config.DATASET.TEST_SET)) if not os.path.exists(output_heapmap_dir): os.mkdir(output_heapmap_dir) # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals batch_image_names = image_names[idx:idx + num_images] save_heatmaps(output, batch_image_names, output_heapmap_dir) idx += num_images # output pose in CSV format output_pose_path = os.path.join( output_dir, 'pose_{}.csv'.format(config.DATASET.TEST_SET)) output_pose = open(output_pose_path, 'w') for p in range(len(all_preds)): output_pose.write("%s," % (image_names[p])) for k in range(len(all_preds[p]) - 1): output_pose.write( "%.3f,%.3f,%.3f," % (all_preds[p][k][0], all_preds[p][k][1], all_preds[p][k][2])) output_pose.write("%.3f,%.3f,%.3f\n" % (all_preds[p][len(all_preds[p]) - 1][0], all_preds[p][len(all_preds[p]) - 1][1], all_preds[p][len(all_preds[p]) - 1][2])) output_pose.close() # output segments img_seg_size = (64, 64) segs = [(5, 15, 16, 17), (5, 6, 12, 15), (6, 10, 11, 12), (23, 33, 34, 35), (23, 24, 30, 33), (24, 28, 29, 30), (10, 11, 29, 28), (11, 12, 30, 29), (12, 13, 31, 30), (13, 14, 32, 31), (14, 15, 33, 32), (15, 16, 34, 33), (16, 17, 35, 34)] output_segment_dir = os.path.join( output_dir, 'segment_{}'.format(config.DATASET.TEST_SET)) if not os.path.exists(output_segment_dir): os.mkdir(output_segment_dir) with open(output_pose_path) as input_pose: reader = csv.reader(input_pose, delimiter=',') for row in reader: img_path = os.path.join(config.DATASET.ROOT, 'images', 'image_' + config.DATASET.TEST_SET, row[0]) img = cv2.imread(img_path) height, width, channels = img.shape kpts = [] for k in range(36): kpt = (int(round(float(row[k * 3 + 1]))), int(round(float(row[k * 3 + 2])))) kpts.append(kpt) output_subdir = os.path.join(output_segment_dir, row[0][:-4]) if not os.path.exists(output_subdir): os.mkdir(output_subdir) for s in range(len(segs)): img_seg = np.zeros([height, width], dtype=np.uint8) kpts_seg = [] for i in segs[s]: kpts_seg.append([kpts[i][0], kpts[i][1]]) if is_convex(kpts_seg): kpts_seg = np.array([kpts_seg], dtype=np.int32) cv2.fillPoly(img_seg, kpts_seg, 255) img_seg = cv2.resize(img_seg, img_seg_size) else: img_seg = np.zeros(img_seg_size, dtype=np.uint8) cv2.imwrite(os.path.join(output_subdir, "%02d.jpg" % s), img_seg)
def test(config, val_loader, val_dataset, model, criterion, output_dir): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() acc_mse = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output heatmap = model(input) if isinstance(heatmap, list): output = heatmap[-1] else: output = heatmap if config.TEST.FLIP_TEST: input_flipped = input.flip(3) outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) target_class = meta["visible"].type( torch.FloatTensor).cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() idx += num_images if i % 1 == 0: prefix = os.path.join(output_dir, 'result') save_result_images(config, input, meta, target, pred * 4, output, prefix, i) msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} {acc_mse.val:.3f} ({acc.avg:.3f} {acc_mse.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc, acc_mse=acc_mse) logger.info(msg) return 0
def validate(config, val_loader, val_dataset, model, criterion, pointcri, anglecri, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() #lossAngle = AverageMeter() lossPoint = AverageMeter() lossScore = AverageMeter() accPearson = AverageMeter() accMAE = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) #all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 2), dtype=np.float32) #ori landmark model all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_preds_point = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) #all_boxes = np.zeros((num_samples, 6)) all_boxes = np.zeros((num_samples, 22)) all_boxes_point = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta, points) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs # output = outputs[-1] else: output = outputs # output = output[0] #import pdb #pdb.set_trace() if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[0] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) points = points.cuda(non_blocking=True) input_w = config.MODEL.IMAGE_SIZE[0] input_h = config.MODEL.IMAGE_SIZE[1] scoreloss = criterion(output, target, target_weight) #pointloss = pointcri(output, points, input_w, input_h) aa = 1 #loss = 1*angleloss + 0.1*pointloss + 0*scoreloss #loss = (1-aa)*pointloss + aa*scoreloss loss = scoreloss num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) #lossPoint.update(pointloss.item(), input.size(0)) lossScore.update(scoreloss.item(), input.size(0)) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() r = meta['rotation'].numpy() score = meta['score'].numpy() w_rate = meta['w_rate'] h_rate = meta['h_rate'] box_list = meta['box_list'].numpy() id = meta['id'].numpy() joints_vis = meta['joints_vis'][:, :, 0].numpy() #shape = [num_joints] scoremap_height = output.shape[2] scoremap_width = output.shape[3] preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score all_boxes[idx:idx + num_images, 6:9] = box_list[:, 0:3] all_boxes[idx:idx + num_images, 9] = id all_boxes[idx:idx + num_images, 10:22] = joints_vis[:, 0:12] image_path.extend(meta['image']) #import pdb #pdb.set_trace() idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'lossScore {scoreloss.val:.5f} ({scoreloss.avg:.5f})\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, scoreloss=lossScore, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) #import pdb #pdb.set_trace() name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) _, full_arch_name = get_model_name(config) if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, full_arch_name) else: _print_name_value(name_values, full_arch_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def evaluate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, args, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: print('flippin') # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 pred, _ = get_max_preds(output.cpu().numpy()) image_path.extend(meta['image']) num_images = input.size(0) idx += num_images print(output_dir) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) # save_debug_images(config, input, meta, target, pred*4, output, prefix) save_output_images(config, input, meta, pred * 4, output, prefix, args) return
def validate(config, val_loader, val_dataset, model, frame_num): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output output = model(input) if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() output_flipped = model(input_flipped) output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 num_images = input.size(0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images queue_len_path = os.path.join( config.TRACKING.SAVE_IMAGE_PATH, "Qlen" + str(config.TRACKING.QUEUE_LEN)) if not os.path.exists(queue_len_path): os.mkdir(queue_len_path) dir_name = os.path.join(queue_len_path, config.TRACKING.VIDEO_FILE_NAME) all_bb_images = os.path.join(dir_name, "all_bb") prefix = '{}{}_{}'.format(all_bb_images + '/', 'frame' + str(frame_num + 1), 'bb' + str(i)) if not os.path.exists(dir_name): os.mkdir(dir_name) if not os.path.exists(all_bb_images): os.mkdir(all_bb_images) save_debug_images(config, input, meta, pred * 4, prefix) oks_nmsed_results = val_dataset.evaluate(all_preds, all_boxes, image_path) return oks_nmsed_results
def test(args): if args.dataset == 'coco': import lib.coco_reader as reader IMAGE_SIZE = [288, 384] # HEATMAP_SIZE = [72, 96] FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] args.kp_dim = 17 args.total_images = 144406 # 149813 elif args.dataset == 'mpii': import lib.mpii_reader as reader IMAGE_SIZE = [384, 384] # HEATMAP_SIZE = [96, 96] FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] args.kp_dim = 16 args.total_images = 2958 # validation else: raise ValueError('The dataset {} is not supported yet.'.format( args.dataset)) print_arguments(args) # Image and target image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32') file_id = layers.data(name='file_id', shape=[ 1, ], dtype='int') # Build model model = pose_resnet.ResNet(layers=50, kps_num=args.kp_dim, test_mode=True) # Output output = model.net(input=image, target=None, target_weight=None) # Parameters from model and arguments params = {} params["total_images"] = args.total_images params["lr"] = args.lr params["num_epochs"] = args.num_epochs params["learning_strategy"] = {} params["learning_strategy"]["batch_size"] = args.batch_size params["learning_strategy"]["name"] = args.lr_strategy if args.with_mem_opt: fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=[output.name]) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) args.pretrained_model = './pretrained/resnet_50/115' if args.pretrained_model: def if_exist(var): exist_flag = os.path.exists( os.path.join(args.pretrained_model, var.name)) return exist_flag fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) if args.checkpoint is not None: fluid.io.load_persistables(exe, args.checkpoint) # Dataloader test_reader = paddle.batch(reader.test(), batch_size=args.batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, file_id]) test_exe = fluid.ParallelExecutor( use_cuda=True if args.use_gpu else False, main_program=fluid.default_main_program().clone(for_test=False), loss_name=None) fetch_list = [image.name, output.name] for batch_id, data in tqdm(enumerate(test_reader())): num_images = len(data) file_ids = [] for i in range(num_images): file_ids.append(data[i][1]) input_image, out_heatmaps = test_exe.run(fetch_list=fetch_list, feed=feeder.feed(data)) if args.flip_test: # Flip all the images in a same batch data_fliped = [] for i in range(num_images): data_fliped.append((data[i][0][:, :, ::-1], data[i][1])) # Inference again _, output_flipped = test_exe.run(fetch_list=fetch_list, feed=feeder.feed(data_fliped)) # Flip back output_flipped = flip_back(output_flipped, FLIP_PAIRS) # Feature is not aligned, shift flipped heatmap for higher accuracy if args.shift_heatmap: output_flipped[:, :, :, 1:] = \ output_flipped.copy()[:, :, :, 0:-1] # Aggregate out_heatmaps = (out_heatmaps + output_flipped) * 0.5 save_predict_results(input_image, out_heatmaps, file_ids, fold_name='results')
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir,lossweight, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) # 不用loader?用,这儿只是统计下总样本数 all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) # 【c】size all_boxes = np.zeros((num_samples, 6)) # 【c】 image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): # 后面的全是 end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] # 为啥是-1:只取最后一个分支的输出 else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) # 【】怎么不用cuda了 if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] # 【】为啥只要最后一个?不可以平均? else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) # 【see】将翻转过的输入的输出变成正常形式,左手-右手-右手(out)-左手(out) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: # 【】output_flipped size output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # 【】什么作用;翻转坐标?? output = (output + output_flipped) * 0.5 # 【see】上下翻转会不会提高 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) #【c】单分支和多分支都一样 loss = torch.stack(loss) loss = torch.mul(lossweight, loss) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) # 一次迭代的 _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # 【c】meta是从哪儿获得的:valoader里来的;只有val有这个:不是,来自于mpii里的datasets里的gt_db c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) # 预测的坐标和值 all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] # 【c】preds不就是0:2吗 all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) # 【l】啥意思 all_boxes[idx:idx + num_images, 5] = score # 【】框的score?哪儿来的?什么作用? image_path.extend(meta['image']) idx += num_images # 已训练图片数 if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i ) save_debug_images(config, input, meta, target, pred*4, output, prefix) # 【】?自动保存了?没看到啊 # 整个epoch循环完了 name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums ) # 【c】val_dataset是个什么类,evaluate函数在哪儿定义的什么作用?? model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: # 【c】name_values _print_name_value(name_value, model_name) # 【】干嘛的 else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar( 'valid_loss', losses.avg, #【】 global_steps ) writer.add_scalar( 'valid_acc', acc.avg, global_steps ) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars( 'valid', dict(name_value), global_steps ) else: writer.add_scalars( 'valid', dict(name_values), global_steps ) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator # 【】这是个啥
def validate_with_opticalflow(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): root = config.DATASET.ROOT batch_time = AverageMeter() # losses = AverageMeter() # acc = AverageMeter() # switch to evaluate mode model.eval() ### load det bboxs ### if os.path.exists( os.path.join( root, 'new_full_det_bboxs_' + str(config.TEST.IMAGE_THRE) + '.npy')): print('loading new_full_det_bboxs.npy from {}...'.format( os.path.join( root, 'new_full_det_bboxs_' + str(config.TEST.IMAGE_THRE) + '.npy'))) full_bboxs = np.load( os.path.join( root, 'new_full_det_bboxs_' + str(config.TEST.IMAGE_THRE) + '.npy')).item() print('detection bboxes loaded') ids = sorted(full_bboxs.keys()) else: print('creating new_full_det_bboxs.npy...') full_bboxs = {} ids = [] for _, meta in val_loader: # print(type(input)) # print(input.shape) image_id = meta['image_id'][0].item() if image_id not in ids: ids.append(int(image_id)) #generate ids ids = sorted(ids) #fulfill the missing ids pre_im_id = ids[0] for im_id in ids: if (im_id - pre_im_id) > 1 and (im_id - pre_im_id) < 60: for i in range(im_id - pre_im_id - 1): pre_im_id = pre_im_id + 1 if pre_im_id not in ids: ids.append(int(pre_im_id)) logger.info( 'adding missing image_id--{}'.format(pre_im_id)) pre_im_id = im_id ids = sorted(ids) temp_key = {} temp_key['ids'] = ids # with open(os.path.join(root,'temp_id_vis.json'),'w') as f : # json.dump(temp_key,f,indent=4) # print('finish writing temp_id_vis.json') for im_id in ids: full_bboxs[im_id] = [] for _, meta in val_loader: image_id = meta['image_id'][0].item() center = meta['center'].numpy() scale = meta['scale'].numpy() score = meta['score'].numpy() box_sc = np.array(meta['box_sc']) # [[x,y,w,h,score]] box = (center, scale, score, box_sc) full_bboxs[int(image_id)].append( box) # {1003420000:[(c1,s1,score1,[x1,y1,w1,h1,score1]), np.save( os.path.join( root, 'new_full_det_bboxs' + str(config.TEST.IMAGE_THRE) + '.npy'), full_bboxs) print('detection bboxes loaded') with torch.no_grad(): end = time.time() batch_time.update(time.time() - end) image_path = [] frames = [] num_box = 0 pres, vals, c, s, sc, track_IDs = [], [], [], [], [], [] Q = deque( maxlen=config.TEST.TRACK_FRAME_LEN) # tracked instances queue next_track_id = FIRST_TRACK_ID for i, im_id in enumerate(ids): file_name = index_to_path(root, im_id) data_numpy = cv2.imread( file_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) frame_boxs = full_bboxs[ im_id] # get all boxes information in this frame boxs = np.array([item[-1] for item in frame_boxs]) keep = bbox_nms(boxs, 0.5) # do the nms for each frame if len(keep) == 0: nmsed_boxs = frame_boxs else: nmsed_boxs = [frame_boxs[_keep] for _keep in keep] print('current im_id_{}'.format(im_id)) next_id = im_id + 1 if next_id in ids: image_id = str(im_id) root_flow = os.path.join(root, config.TEST.FLOW_PATH) folder_name = image_id[1:7] + '_mpii_test' flow_name = '00' + image_id[-4:] + '.flo' flow_path = os.path.join(root_flow, folder_name, flow_name) flow = mmcv.flowread(flow_path) # [h,w,2] instance = [] which_box = 0 #compute each box for box in nmsed_boxs: person = {} person_flow = {} num_box += 1 c_d = box[0] s_d = box[1] c_dt = box[0][0] s_dt = box[1][0] # print('type:{}, value:{}'.format(type(s_d),s_d)) score = box[2] c.append(c_dt) s.append(s_dt) sc.append(score) r = 0 image_path.append(file_name) h, w = data_numpy.shape[0], data_numpy.shape[1] trans = get_affine_transform(c_dt, s_dt, r, config.MODEL.IMAGE_SIZE) input = cv2.warpAffine(data_numpy, trans, (int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])), flags=cv2.INTER_LINEAR) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) input = transform(input) new_input = np.zeros([ 1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0] ]) new_input[0, :, :, :] = input[:, :, :] input = torch.from_numpy(new_input).float() output = model(input) if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() output_flipped = model(input_flipped) output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy( output_flipped.copy()).cuda() if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 batch_time.update(time.time() - end) end = time.time() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c_d, s_d) #preds--(1, 17, 2) #macvals--(1, 17, 1) # if the value of each pred<0.4 set the joint into invisible preds_set_invisible, maxvals_set_invisible = get_visible_joints( preds.copy(), maxvals.copy(), config.TEST.IN_VIS_THRE) individual = np.concatenate( (preds_set_invisible.squeeze(), maxvals_set_invisible.reshape(-1, 1)), axis=1).flatten() # array with shape(num_keypoints x 3,) person['image'] = input person['keypoints'] = individual # person['bbox'] = bbox[:-1] person['score'] = score[0] person['track_id'] = None person['bbox'], person['area'] = get_bbox_sc_from_cs( c_dt, s_dt, score) instance.append(person) pres.append(preds_set_invisible) vals.append(maxvals_set_invisible) #[1,17,1] #get the avg_joint_score for each box joint_score = 0 for joint_i in range(maxvals.shape[1]): joint_score += maxvals[0][joint_i] avg_joint_score = joint_score / maxvals.shape[1] #get center and scale from flow c_flow, s_flow, box_sc_flow, is_ignore, scale_flow, flow_kps = get_cs_from_flow( flow, preds_set_invisible, h, w, avg_joint_score, config.TEST.FLOW_THRE) # TODO ### save debug bboxes ### if (i % config.PRINT_FREQ ) == 0 and config.DEBUG.SAVE_ALL_BOXES: file = data_numpy.copy() save_all_boxes_with_joints(file, im_id, which_box, c_dt, s_dt, c_flow[0], s_flow[0], preds_set_invisible, score, avg_joint_score, output_dir) which_box += 1 if is_ignore or next_id not in ids: continue box_flow = (c_flow, s_flow, avg_joint_score, box_sc_flow) full_bboxs[next_id].append(box_flow) individual_flow = np.concatenate( (flow_kps, maxvals.reshape(-1, 1)), axis=1).flatten() person_flow['keypoints'] = individual_flow person_flow['bbox'] = box_sc_flow[:-1] person_flow['area'] = scale_flow person_flow['track_id'] = None person_flow['image'] = input person_flow['score'] = score[0] instance = instance[:-1] instance.append(person_flow) ### Assign the Track ID for all instances in one frame ### #when the image id is in ids but detector and flow detect nobody in the frame if len(instance) == 0: continue # the last frame of a video # go into another video if next_id not in ids: if config.DEBUG.SAVE_VIDEO_TRACKING: frames = save_image_with_skeleton(data_numpy, im_id, instance, output_dir, frames, next_id, ids) IDs, next_track_id = assignID( Q, instance, next_track_id, similarity_thresh=config.TEST.TRACK_SIMILARITY_THRE) for i_person, each_person in enumerate(instance): each_person['track_id'] = IDs[i_person] track_IDs.extend(IDs) Q = deque(maxlen=config.TEST.TRACK_FRAME_LEN) next_track_id = FIRST_TRACK_ID logger.info( 'current_im_id{}--go in to next video--next_track_id{}'. format(im_id, next_track_id)) continue # init the Deque for the next video IDs, next_track_id = assignID( Q, instance, next_track_id, similarity_thresh=config.TEST.TRACK_SIMILARITY_THRE) print('IDs--{}'.format(IDs)) for i_person, each_person in enumerate(instance): each_person['track_id'] = IDs[i_person] #########################save image with joints and skeletons################### if config.DEBUG.SAVE_VIDEO_TRACKING: frames = save_image_with_skeleton(data_numpy, im_id, instance, output_dir, frames, next_id, ids) track_IDs.extend(IDs) Q.append(instance) # Q:[[{},{},{}],[{},{}],...] #print if i % (config.PRINT_FREQ) == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ .format( i, len(ids), batch_time=batch_time,) logger.info(msg) logger.info('boxes number:{}\t'.format(num_box)) pres = np.array(pres) vals = np.array(vals) c, s, sc = np.array(c), np.array(s), np.array(sc) np.save(os.path.join(root, 'full_pose_results.npy'), pres) np.save(os.path.join(root, 'full_pose_scores.npy'), vals) total_bboxes = np.zeros((num_box, 6)) total_preds = np.zeros((num_box, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) # num_box x 17 x 3 total_track_IDs = np.zeros((num_box)) for i in range(num_box): total_preds[i:i + 1, :, 0:2] = pres[i, :, :, 0:2] total_preds[i:i + 1, :, 2:3] = vals[i] total_bboxes[i:i + 1, 0:2] = c[i][0:2] total_bboxes[i:i + 1, 2:4] = s[i][0:2] total_bboxes[i:i + 1, 4] = np.prod(s * 200, 1)[i] total_bboxes[i:i + 1, 5] = sc[i] total_track_IDs[i] = track_IDs[i] name_values, perf_indicator = val_dataset.evaluate( config, total_preds, output_dir, total_bboxes, image_path, total_track_IDs) return perf_indicator
def main(): args = parse_args() update_config(cfg, args) if args.prevModelDir and args.modelDir: # copy pre models for philly copy_prev_models(args.prevModelDir, args.modelDir) logger, final_output_dir, tb_log_dir = create_logger( cfg, args.cfg, 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True) # copy model file this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) # logger.info(pprint.pformat(model)) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } dump_input = torch.rand( (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])) writer_dict['writer'].add_graph(model, (dump_input, )) logger.info(get_model_summary(model, dump_input)) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() # define loss function (criterion) and optimizer criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True, transforms.Compose([ transforms.ToTensor(), normalize, ])) valid_dataset = eval('dataset.' + cfg.DATASET.DATASET)( cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False, transforms.Compose([ transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=cfg.TEST.BATCH_SIZE_PER_GPU * len(cfg.GPUS), shuffle=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) best_perf = 0.0 best_model = False last_epoch = -1 optimizer = get_optimizer(cfg, model) begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth') # if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): # logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) # checkpoint = torch.load(checkpoint_file) # begin_epoch = checkpoint['epoch'] # best_perf = checkpoint['perf'] # last_epoch = checkpoint['epoch'] # model.load_state_dict(checkpoint['state_dict']) # # optimizer.load_state_dict(checkpoint['optimizer']) # logger.info("=> loaded checkpoint '{}' (epoch {})".format( # checkpoint_file, checkpoint['epoch'])) # checkpoint = torch.load('output/jd/pose_hrnet/crop_face/checkpoint.pth') # model.load_state_dict(checkpoint['state_dict']) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=last_epoch) for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): lr_scheduler.step() # train for one epoch train(cfg, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict) # evaluate on validation set # perf_indicator = validate( # cfg, valid_loader, valid_dataset, model, criterion, # final_output_dir, tb_log_dir, writer_dict # ) # # if perf_indicator >= best_perf: # best_perf = perf_indicator # best_model = True # else: # best_model = False # import tqdm # import cv2 # import numpy as np # from lib.utils.imutils import im_to_numpy, im_to_torch # flip = True # full_result = [] # for i, (inputs,target, target_weight, meta) in enumerate(valid_loader): # with torch.no_grad(): # input_var = torch.autograd.Variable(inputs.cuda()) # if flip == True: # flip_inputs = inputs.clone() # for i, finp in enumerate(flip_inputs): # finp = im_to_numpy(finp) # finp = cv2.flip(finp, 1) # flip_inputs[i] = im_to_torch(finp) # flip_input_var = torch.autograd.Variable(flip_inputs.cuda()) # # # compute output # refine_output = model(input_var) # score_map = refine_output.data.cpu() # score_map = score_map.numpy() # # if flip == True: # flip_output = model(flip_input_var) # flip_score_map = flip_output.data.cpu() # flip_score_map = flip_score_map.numpy() # # for i, fscore in enumerate(flip_score_map): # fscore = fscore.transpose((1, 2, 0)) # fscore = cv2.flip(fscore, 1) # fscore = list(fscore.transpose((2, 0, 1))) # for (q, w) in train_dataset.flip_pairs: # fscore[q], fscore[w] = fscore[w], fscore[q] # fscore = np.array(fscore) # score_map[i] += fscore # score_map[i] /= 2 # # # ids = meta['imgID'].numpy() # # det_scores = meta['det_scores'] # for b in range(inputs.size(0)): # # details = meta['augmentation_details'] # # imgid = meta['imgid'][b] # # print(imgid) # # category = meta['category'][b] # # print(category) # single_result_dict = {} # single_result = [] # # single_map = score_map[b] # r0 = single_map.copy() # r0 /= 255 # r0 += 0.5 # v_score = np.zeros(106) # for p in range(106): # single_map[p] /= np.amax(single_map[p]) # border = 10 # dr = np.zeros((112 + 2 * border, 112 + 2 * border)) # dr[border:-border, border:-border] = single_map[p].copy() # dr = cv2.GaussianBlur(dr, (7, 7), 0) # lb = dr.argmax() # y, x = np.unravel_index(lb, dr.shape) # dr[y, x] = 0 # lb = dr.argmax() # py, px = np.unravel_index(lb, dr.shape) # y -= border # x -= border # py -= border + y # px -= border + x # ln = (px ** 2 + py ** 2) ** 0.5 # delta = 0.25 # if ln > 1e-3: # x += delta * px / ln # y += delta * py / ln # x = max(0, min(x, 112 - 1)) # y = max(0, min(y, 112 - 1)) # resy = float((4 * y + 2) / 112 * (450)) # resx = float((4 * x + 2) / 112 * (450)) # # resy = float((4 * y + 2) / cfg.data_shape[0] * (450)) # # resx = float((4 * x + 2) / cfg.data_shape[1] * (450)) # v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)]) # single_result.append(resx) # single_result.append(resy) # if len(single_result) != 0: # result = [] # # result.append(imgid) # j = 0 # while j < len(single_result): # result.append(float(single_result[j])) # result.append(float(single_result[j + 1])) # j += 2 # full_result.append(result) model.eval() import numpy as np from core.inference import get_final_preds from utils.transforms import flip_back import csv num_samples = len(valid_dataset) all_preds = np.zeros((num_samples, 106, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 full_result = [] with torch.no_grad(): for i, (input, target, target_weight, meta) in enumerate(valid_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if cfg.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), valid_dataset.flip_pairs) output_flipped = torch.from_numpy( output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if cfg.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss c = meta['center'].numpy() s = meta['scale'].numpy() # print(c.shape) # print(s.shape) # print(c[:3, :]) # print(s[:3, :]) score = meta['score'].numpy() preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), c, s) # print(preds.shape) for b in range(input.size(0)): result = [] # pic_name=meta['image'][b].split('/')[-1] # result.append(pic_name) for points in range(106): # result.append(str(int(preds[b][points][0])) + ' ' + str(int(preds[b][points][1]))) result.append(float(preds[b][points][0])) result.append(float(preds[b][points][1])) full_result.append(result) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images # with open('res.csv', 'w', newline='') as f: # writer = csv.writer(f) # writer.writerows(full_result) gt = [] with open("/home/sk49/workspace/cy/jd/val.txt") as f: for line in f.readlines(): rows = list(map(float, line.strip().split(' ')[1:])) gt.append(rows) error = 0 for i in range(len(gt)): error = NME(full_result[i], gt[i]) + error print(error) log_file = [] log_file.append( [epoch, optimizer.state_dict()['param_groups'][0]['lr'], error]) with open('log_file.csv', 'a', newline='') as f: writer1 = csv.writer(f) writer1.writerows(log_file) # logger.close() logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.module.state_dict(), # 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth') logger.info( '=> saving final model state to {}'.format(final_model_state_file)) torch.save(model.module.state_dict(), final_model_state_file) writer_dict['writer'].close()
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 export_annots = [] target_weights = [] pred_max_vals_valid = [] with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs target_weights.append(target_weight) if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped) if config.USE_GPU: input_flipped = input_flipped.cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()) if config.USE_GPU: output_flipped = output_flipped.cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 if config.USE_GPU: target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) pred_maxvals = get_max_preds(output.cpu().numpy())[1] acc.update(avg_acc, cnt) pred_max_vals_valid.append(pred_maxvals) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # c = meta['center'].numpy() # s = meta['scale'].numpy() # score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), None, None) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] * 4 # to go from hm size 64 to image size 256 all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts # all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] # all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] # all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) # all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) #Export annotations for j in range(num_images): annot = {"joints_vis": maxvals[j].squeeze().tolist(), "joints": (pred[j]*4).tolist(), "image": meta['image'][j] } export_annots.append(annot) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format( os.path.join(output_dir, 'val'), i ) save_debug_images(config, input, meta, target, pred*4, output, prefix) if config.LOCAL and i>10: break name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums ) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value_column(name_value, model_name) else: _print_name_value_column(name_values, model_name) #Compute and display accuracy, precision and recall target_weights = torch.cat(target_weights, dim=0).squeeze() gt_vis = ~target_weights.cpu().numpy().astype(bool) pred_max_vals_valid = np.concatenate(pred_max_vals_valid, axis=0) msg_notvis = metrics_notvisible(pred_max_vals_valid, gt_vis) logger.info(msg_notvis) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar( 'valid_loss', losses.avg, global_steps ) writer.add_scalar( 'valid_acc', acc.avg, global_steps ) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars( 'valid', dict(name_value), global_steps ) else: writer.add_scalars( 'valid', dict(name_values), global_steps ) writer_dict['valid_global_steps'] = global_steps + 1 with open(os.path.join(output_dir, '{}_pred_annots_{}.json'.format(config.DATASET.TEST_SET, time.strftime('%Y-%m-%d-%H-%M'))), 'w', encoding='utf-8') as f: json.dump(export_annots, f, ensure_ascii=False, indent=4) return perf_indicator
def valid(args): if args.dataset == 'coco': import lib.coco_reader as reader IMAGE_SIZE = [288, 384] HEATMAP_SIZE = [72, 96] FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] args.kp_dim = 17 args.total_images = 144406 # 149813 elif args.dataset == 'mpii': import lib.mpii_reader as reader IMAGE_SIZE = [384, 384] HEATMAP_SIZE = [96, 96] FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] args.kp_dim = 16 args.total_images = 2958 # validation else: raise ValueError('The dataset {} is not supported yet.'.format( args.dataset)) print_arguments(args) # Image and target image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32') target = layers.data(name='target', shape=[args.kp_dim, HEATMAP_SIZE[1], HEATMAP_SIZE[0]], dtype='float32') target_weight = layers.data(name='target_weight', shape=[args.kp_dim, 1], dtype='float32') center = layers.data(name='center', shape=[ 2, ], dtype='float32') scale = layers.data(name='scale', shape=[ 2, ], dtype='float32') score = layers.data(name='score', shape=[ 1, ], dtype='float32') # Build model model = pose_resnet.ResNet(layers=50, kps_num=args.kp_dim) # Output loss, output = model.net(input=image, target=target, target_weight=target_weight) # Parameters from model and arguments params = {} params["total_images"] = args.total_images params["lr"] = args.lr params["num_epochs"] = args.num_epochs params["learning_strategy"] = {} params["learning_strategy"]["batch_size"] = args.batch_size params["learning_strategy"]["name"] = args.lr_strategy if args.with_mem_opt: fluid.memory_optimize( fluid.default_main_program(), skip_opt_set=[loss.name, output.name, target.name]) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) args.pretrained_model = './pretrained/resnet_50/115' if args.pretrained_model: def if_exist(var): exist_flag = os.path.exists( os.path.join(args.pretrained_model, var.name)) return exist_flag fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) if args.checkpoint is not None: fluid.io.load_persistables(exe, args.checkpoint) # Dataloader valid_reader = paddle.batch(reader.valid(), batch_size=args.batch_size) feeder = fluid.DataFeeder( place=place, feed_list=[image, target, target_weight, center, scale, score]) valid_exe = fluid.ParallelExecutor( use_cuda=True if args.use_gpu else False, main_program=fluid.default_main_program().clone(for_test=False), loss_name=loss.name) fetch_list = [image.name, loss.name, output.name, target.name] # For validation acc = AverageMeter() idx = 0 num_samples = args.total_images all_preds = np.zeros((num_samples, args.kp_dim, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) for batch_id, data in enumerate(valid_reader()): num_images = len(data) centers = [] scales = [] scores = [] for i in range(num_images): centers.append(data[i][3]) scales.append(data[i][4]) scores.append(data[i][5]) input_image, loss, out_heatmaps, target_heatmaps = valid_exe.run( fetch_list=fetch_list, feed=feeder.feed(data)) if args.flip_test: # Flip all the images in a same batch data_fliped = [] for i in range(num_images): # Input, target, target_weight, c, s, score data_fliped.append(( # np.flip(input_image, 3)[i], data[i][0][:, :, ::-1], data[i][1], data[i][2], data[i][3], data[i][4], data[i][5])) # Inference again _, _, output_flipped, _ = valid_exe.run( fetch_list=fetch_list, feed=feeder.feed(data_fliped)) # Flip back output_flipped = flip_back(output_flipped, FLIP_PAIRS) # Feature is not aligned, shift flipped heatmap for higher accuracy if args.shift_heatmap: output_flipped[:, :, :, 1:] = \ output_flipped.copy()[:, :, :, 0:-1] # Aggregate # out_heatmaps.shape: size[b, args.kp_dim, 96, 96] out_heatmaps = (out_heatmaps + output_flipped) * 0.5 loss = np.mean(np.array(loss)) # Accuracy _, avg_acc, cnt, pred = accuracy(out_heatmaps, target_heatmaps) acc.update(avg_acc, cnt) # Current center, scale, score centers = np.array(centers) scales = np.array(scales) scores = np.array(scores) preds, maxvals = get_final_preds(args, out_heatmaps, centers, scales) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # Double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = centers[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = scales[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(scales * 200, 1) all_boxes[idx:idx + num_images, 5] = scores # image_path.extend(meta['image']) idx += num_images print('Epoch [{:4d}] ' 'Loss = {:.5f} ' 'Acc = {:.5f}'.format(batch_id, loss, acc.avg)) if batch_id % 10 == 0: save_batch_heatmaps(input_image, out_heatmaps, file_name='*****@*****.**', normalize=True) # Evaluate args.DATAROOT = 'data/mpii' args.TEST_SET = 'valid' output_dir = '' filenames = [] imgnums = [] image_path = [] name_values, perf_indicator = mpii_evaluate(args, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) print_name_value(name_values, perf_indicator)
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() model.eval() total_error = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output output = model(input) output_256 = model(meta['img_resize256_BN']) if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() # 计算翻转图像的预测结果 input_flipped = torch.from_numpy(input_flipped).cuda() output_flipped = model(input_flipped) output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = output_flipped.clone( )[:, :, :, 0:-1] # 将结果向右偏移1个单位 # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) # 将heatmap的点,变成特征点上的图。 num_images = input.size(0) losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy( ) # 因为图像从Dataset里面出来的时候,加了个随机偏移,所以在测试的时候,要把图像偏回去 s = meta['scale'].numpy() # 比较pred和gt的heatmap值 pred_heatmap, _ = get_max_preds( output.clone().cpu().numpy()) # 预测值 target_heatmap, _ = get_max_preds(target.clone().cpu().numpy()) pred_heatmap_256, _ = get_max_preds( output_256.clone().cpu().numpy()) pred_heatmap_256 *= 4 target_256_64, _ = get_max_preds( meta['target_256_64'].clone().cpu().numpy()) target_256_64 *= 4 preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) # 变回到250那个尺度上 gt_landmark = meta['joints'].numpy() imgs_256 = meta["img_256"] # for img_idx in range(imgs_256.shape[0]): # vis_face(imgs_256[img_idx], gt_landmark[img_idx], str(img_idx) + ".jpg") # vis_face(imgs_256[img_idx], preds[img_idx], str(img_idx) + ".jpg") # vis_face(meta['img_resize256'][img_idx], meta['joints_256'][img_idx], str(img_idx) + ".jpg") # vis_face(meta['img_resize256'][img_idx], target_256_64[img_idx], "target_256_64"+str(img_idx) + ".jpg", show = False) # vis_face(meta['img_resize256'][img_idx], pred_heatmap_256[img_idx], "pred_heatmap_256"+ str(img_idx) + ".jpg", show = False) # batch_error_mean = normalisedError(gt_landmark, preds) batch_error_mean = normalisedError(target_256_64, pred_heatmap_256) total_error += batch_error_mean total_mean_error = total_error / (i + 1) print( "batch id:{0}, current batch mean error is:{1}, total mean error is:{2}" .format(i, batch_error_mean, total_mean_error))
def inference(config, image_loader, image_dataset, model, output_dir): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(image_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 5)) all_image_pathes = [] all_image_ids = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(image_loader): num_images = input.size(0) # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), image_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] # output_flipped[:, :, :, 0] = 0 output = (output + output_flipped) * 0.5 # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() tlwhs = meta['bbox_tlwh'].numpy() output = output.data.cpu() preds, maxvals = get_final_preds(config, output.numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:4] = tlwhs all_boxes[idx:idx + num_images, 4] = score all_image_pathes.extend(meta['image']) if config.DATASET.DATASET == 'mot': seq_names, frame_ids = meta['image_id'] frame_ids = frame_ids.numpy().astype(int) all_image_ids.extend(list(zip(seq_names, frame_ids))) elif config.DATASET.DATASET == 'aifi': all_image_ids.extend(meta['image_id']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( i, len(image_loader), batch_time=batch_time) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'inference'), i) pred, _ = get_max_preds(output.numpy()) save_debug_images(config, input, meta, target, pred * 4, output, prefix) # write output frame_results = defaultdict(list) for image_id, pred, box in zip(all_image_ids, all_preds, all_boxes): frame_results[image_id].append( (pred.astype(float).tolist(), box.astype(float).tolist())) final_results = {} for image_id, results in frame_results.items(): keypoints, boxes = zip(*results) final_results[image_id] = {'keypoints': keypoints, 'boxes': boxes} if not os.path.isdir(output_dir): os.makedirs(output_dir) with open(os.path.join(output_dir, 'box_keypoints.json'), 'w') as f: json.dump(final_results, f) logger.info('Save results to {}'.format( os.path.join(output_dir, 'box_keypoints.json')))
def test(config, test_loader, test_dataset, model, output_dir, writer_dict=None): # switch to evaluate mode model.eval() num_samples = len(test_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) image_path = [] all_boxes = np.zeros((num_samples, 6)) filenames = [] imgnums = [] idx = 0 with torch.no_grad(): for i, (impath, input, center, scale, score) in enumerate(test_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 num_images = input.size(0) # measure elapsed time c = center.numpy() s = scale.numpy() score = score.numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(impath) idx += num_images prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) + '.jpg' save_batch_heatmaps(input, output, prefix) name_values, perf_indicator = test_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums)
def speedtest(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, epoch, writer_dict=None): ''' Speedtest mode first warms up on half the test size (especially Pytorch CUDA benchmark mode needs warmup to optimize operations), and then performs the speedtest on the other half ''' # switch to evaluate mode model.eval() idx = 0 logger.info(f'# SPEEDTEST: EPOCH {epoch}') logger.info('\n\n>> WARMUP') model = add_flops_counting_methods(model) model.start_flops_count() with torch.no_grad(): val_iter = val_loader.__iter__() num_step = len(val_iter) for i in range(num_step): if i == num_step // 2: avg_flops, total_flops, batch_count = model.compute_average_flops_cost( ) logger.info( f'# PARAMS {get_model_parameters_number(model, as_string=False)/1e6}M' ) logger.info( f'# FLOPS (multiply-accumulates, MACs): {(total_flops/idx)/1e9} G on {idx} images (batch_count={batch_count})' ) model.stop_flops_count() idx = 0 logger.info('\n\n>> SPEEDTEST') torch.cuda.synchronize() START = time.perf_counter() input, _, _, _ = next(val_iter) input = input.cuda(non_blocking=True) dynconv_meta = make_dynconv_meta(config, epoch, i) outputs, dynconv_meta = model(input, dynconv_meta) output = outputs[-1] if isinstance(outputs, list) else outputs if config.TEST.FLIP_TEST: input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 num_images = input.size(0) idx += num_images torch.cuda.synchronize() STOP = time.perf_counter() samples_per_second = idx / (STOP - START) logger.info( f'ELAPSED TIME: {(STOP-START)}s, SAMPLES PER SECOND: {samples_per_second} ON {idx} SAMPLES' ) return idx / (STOP - START)
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 7)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): target = target.cuda(non_blocking=True).float() target_weight = target_weight.cuda(non_blocking=True).float() cat_ids = meta['category_id'] c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() channel_mask = torch.zeros_like(target_weight).float() # print(channel_mask.shape) # print(type(channel_mask)) for j, cat_id in enumerate(cat_ids): rg = val_dataset.gt_class_keypoints_dict[int(cat_id)] index = torch.tensor([list(range(rg[0], rg[1]))], device=channel_mask.device, dtype=channel_mask.dtype).transpose( 1, 0).long() channel_mask[j].scatter_(0, index, 1) # compute output # print(input[:, :, 100:110, 100:110]) interval = val_dataset.gt_class_keypoints_dict[1] output = model(input) # print('output_shape: ', output.shape) # print(output[0, interval[0]:interval[1], :, :]) if config.MODEL.TARGET_TYPE == 'gaussian': if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = output_flipped.cpu().numpy() category_id_list = meta['category_id'].cpu().numpy().copy() for j, category_id in enumerate(category_id_list): output_flipped[j, :, :, :] = flip_back( output_flipped[j, None], val_dataset.flip_pairs[category_id - 1], config.MODEL.HEATMAP_SIZE[0]) output_flipped = torch.from_numpy( output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 # print('aaaaaaa', output[0, interval[0]:interval[1], :, :]) # block irrelevant channels in output interval = val_dataset.gt_class_keypoints_dict[1] # print(output[0, interval[0]:interval[1], :, :]) output = output * channel_mask.unsqueeze(3) preds, maxvals = get_final_preds(config, output.detach().cpu().numpy(), c, s) elif config.MODEL.TARGET_TYPE == 'coordinate': heatmap, output = output if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() output_flipped = model(input_flipped) heatmap_flipped, output_flipped = output_flipped output_flipped = output_flipped.cpu().numpy() category_id_list = meta['category_id'].cpu().numpy().copy() for j, category_id in enumerate(category_id_list): output_flipped[j, :, :] = flip_back( output_flipped[j, None], val_dataset.flip_pairs[category_id - 1], config.MODEL.HEATMAP_SIZE[0]) output_flipped = torch.from_numpy( output_flipped.copy()).cuda() output = (output + output_flipped) * 0.5 preds, maxvals = get_final_preds( config, output.detach().cpu().numpy(), c, s, heatmap.detach().cpu().numpy()) # block irrelevant channels in output output = output * channel_mask else: raise NotImplementedError('{} is not implemented'.format( config.MODEL.TARGET_TYPE)) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(), target.detach().cpu().numpy(), val_dataset.target_type) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() rg = val_dataset.gt_class_keypoints_dict[1] all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score all_boxes[idx:idx + num_images, 6] = meta['category_id'].cpu().numpy().astype(int) image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.5f} ({loss.avg:.5f})\t' \ 'acc {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) coeff = config.MODEL.IMAGE_SIZE[0] / config.MODEL.HEATMAP_SIZE[ 0] save_debug_images(config, input, meta, target, preds * coeff, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) # if isinstance(name_values, list): # for name_value in name_values: # writer.add_scalars( # 'valid', # dict(name_value), # global_steps # ) # else: # writer.add_scalars( # 'valid', # dict(name_values), # global_steps # ) writer.add_scalar('valid_AP', perf_indicator, global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def predict(config, val_loader, val_dataset, model): batch_time = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_names = [] orig_boxes = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 num_images = input.size(0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score names = meta['image'] image_names.extend(names) orig_boxes.extend(meta['origbox']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format( i, len(val_loader), batch_time=batch_time) print(msg) return all_preds, all_boxes, image_names, orig_boxes
def evaluate(config, val_loader, val_dataset, model, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() # losses = AverageMeter() # acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros( (num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] # filenames = [] # imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 num_images = input.size(0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds( config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images # if i % config.PRINT_FREQ == 0: # msg = 'Test: [{0}/{1}]\t' \ # 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ # 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ # 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( # i, len(val_loader), batch_time=batch_time, # loss=losses, acc=acc) # logger.info(msg) # # prefix = '{}_{}'.format( # os.path.join(output_dir, 'val'), i # ) # save_debug_images(config, input, meta, target, pred*4, output, # prefix) val_dataset.save_results(all_preds)
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def main(): yolact_result = get_yolactjit_result() args = argparse.Namespace() args.cfg = 'experiments/coco/lpn/lpn50_256x192_gd256x2_gc.yaml' args.modelDir = '' args.logDir = '' update_config(cfg, args) model = get_pose_net(cfg, is_train=False) #model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=False) model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False) model = model.cuda() #model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda() # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) Fasttransforms = transforms.Compose([ transforms.ToTensor(), normalize, ]) model.eval() for image_index, it in enumerate(yolact_result): transforms_image = [] for crop_image in it['crop_image']: crop_image = Fasttransforms(crop_image) transforms_image.append(crop_image) transforms_image = torch.from_numpy( np.stack(transforms_image)).cuda().half() #temp = np.stack(it['crop_image']).transpose(0, 3, 1, 2) #transforms_image = torch.cat((transforms_image, transforms_image, transforms_image, transforms_image, transforms_image), 0)[:50] outputs = model(transforms_image) torch.cuda.synchronize() t = time.time() outputs = model(transforms_image) torch.cuda.synchronize() print(time.time() - t) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if cfg.TEST.FLIP_TEST: input_flipped = transforms_image.flip(3) outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] output_flipped = flip_back(output_flipped.cpu().detach().numpy(), flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if cfg.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 preds, maxvals, preds_ori = get_final_preds_using_softargmax( cfg, output.clone(), np.array(it['c']).copy(), np.array(it['s']).copy()) temp_image = it['full_image'].copy() for point_for_image in (preds): for index, point in enumerate(point_for_image): cv2.circle(temp_image, (int(point[0]), int(point[1])), 1, point_color2[index], 3) cv2.imwrite('result_{}.png'.format(image_index), temp_image) print("") print("")