def _val(self): num_samples = len(self.ds_val) all_preds = np.zeros((num_samples, self.model_nof_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6), dtype=np.float32) image_paths = [] idx = 0 self.model.eval() with torch.no_grad(): for step, (image, target, target_weight, joints_data) in enumerate( tqdm(self.dl_val, desc='Validating')): image = image.to(self.device) target = target.to(self.device) target_weight = target_weight.to(self.device) output = self.model(image) if self.flip_test_images: image_flipped = flip_tensor(image, dim=-1) output_flipped = self.model(image_flipped) output_flipped = flip_back(output_flipped, self.ds_val.flip_pairs) output = (output + output_flipped) * 0.5 loss = self.loss_fn(output, target, target_weight) # Evaluate accuracy # Get predictions on the resized images (given as input) accs, avg_acc, cnt, joints_preds, joints_target = \ self.ds_train.evaluate_accuracy(output, target) # Original num_images = image.shape[0] # measure elapsed time c = joints_data['center'].numpy() s = joints_data['scale'].numpy() score = joints_data['score'].numpy() pixel_std = 200 # ToDo Parametrize this preds, maxvals = get_final_preds( True, output, c, s, pixel_std) # ToDo check what post_processing exactly does all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2].detach().cpu().numpy() all_preds[idx:idx + num_images, :, 2:3] = maxvals.detach().cpu().numpy() # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1) all_boxes[idx:idx + num_images, 5] = score image_paths.extend(joints_data['imgPath']) idx += num_images self.mean_loss_val += loss.item() self.mean_acc_val += avg_acc.item() if self.use_tensorboard: self.summary_writer.add_scalar( 'val_loss', loss.item(), global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar( 'val_acc', avg_acc.item(), global_step=step + self.epoch * self.len_dl_val) if step == 0: save_images(image, target, joints_target, output, joints_preds, joints_data['joints_visibility'], self.summary_writer, step=step + self.epoch * self.len_dl_train, prefix='test_') self.mean_loss_val /= len(self.dl_val) self.mean_acc_val /= len(self.dl_val) # COCO evaluation print('\nVal AP/AR') self.val_accs, self.mean_mAP_val = self.ds_val.evaluate_overall_accuracy( all_preds, all_boxes, image_paths, output_dir=self.log_path)
def _train(self): num_samples = self.len_dl_train * self.batch_size all_preds = np.zeros((num_samples, self.model_nof_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6), dtype=np.float32) image_paths = [] idx = 0 self.model.train() for step, (image, target, target_weight, joints_data) in enumerate( tqdm(self.dl_train, desc='Training')): image = image.to(self.device) target = target.to(self.device) target_weight = target_weight.to(self.device) self.optim.zero_grad() output = self.model(image) loss = self.loss_fn(output, target, target_weight) loss.backward() self.optim.step() # Evaluate accuracy # Get predictions on the resized images (given as input) accs, avg_acc, cnt, joints_preds, joints_target = \ self.ds_train.evaluate_accuracy(output, target) # Original num_images = image.shape[0] # measure elapsed time c = joints_data['center'].numpy() s = joints_data['scale'].numpy() score = joints_data['score'].numpy() pixel_std = 200 # ToDo Parametrize this # Get predictions on the original imagee preds, maxvals = get_final_preds( True, output.detach(), c, s, pixel_std) # ToDo check what post_processing exactly does all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2].detach().cpu().numpy() all_preds[idx:idx + num_images, :, 2:3] = maxvals.detach().cpu().numpy() all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1) all_boxes[idx:idx + num_images, 5] = score image_paths.extend(joints_data['imgPath']) idx += num_images self.mean_loss_train += loss.item() if self.use_tensorboard: self.summary_writer.add_scalar('train_loss', loss.item(), global_step=step + self.epoch * self.len_dl_train) self.summary_writer.add_scalar('train_acc', avg_acc.item(), global_step=step + self.epoch * self.len_dl_train) if step == 0: save_images(image, target, joints_target, output, joints_preds, joints_data['joints_visibility'], self.summary_writer, step=step + self.epoch * self.len_dl_train, prefix='train_') self.mean_loss_train /= len(self.dl_train) # COCO evaluation print('\nTrain AP/AR') self.train_accs, self.mean_mAP_train = self.ds_train.evaluate_overall_accuracy( all_preds, all_boxes, image_paths, output_dir=self.log_path)
def _val(self): num_samples = len(self.ds_val) all_preds = np.zeros((num_samples, self.model_nof_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6), dtype=np.float32) image_paths = [] idx = 0 self.model.eval() with torch.no_grad(): for step, (image, target, target_weight, joints_data) in enumerate( tqdm(self.dl_val, desc='Validating')): image = image.to(self.device) target = target.to(self.device) target_weight = target_weight.to(self.device) output = self.model(image) if self.flip_test_images: image_flipped = flip_tensor(image, dim=-1) output_flipped = self.model(image_flipped) output_flipped = flip_back(output_flipped, self.ds_val.flip_pairs) output = (output + output_flipped) * 0.5 loss = self.loss_fn(output, target, target_weight) # Evalua la precision # Obtiene predicciones de las imagenes redimensionadas (como argumento) accs, avg_acc, cnt, joints_preds, joints_target = \ self.ds_train.evaluate_accuracy(output, target) # Original num_images = image.shape[0] # Mide el tiempo de calculo c = joints_data['center'].numpy() s = joints_data['scale'].numpy() score = joints_data['score'].numpy() pixel_std = 200 preds, maxvals = get_final_preds(True, output, c, s, pixel_std) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2].detach().cpu().numpy() all_preds[idx:idx + num_images, :, 2:3] = maxvals.detach().cpu().numpy() all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1) all_boxes[idx:idx + num_images, 5] = score image_paths.extend(joints_data['imgPath']) idx += num_images self.mean_loss_val += loss.item() self.mean_acc_val += avg_acc.item() if self.use_tensorboard: self.summary_writer.add_scalar( 'val_loss', loss.item(), global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar( 'val_acc', avg_acc.item(), global_step=step + self.epoch * self.len_dl_val) if step == 0: save_images(image, target, joints_target, output, joints_preds, joints_data['joints_visibility'], self.summary_writer, step=step + self.epoch * self.len_dl_train, prefix='test_') self.mean_loss_val /= len(self.dl_val) self.mean_acc_val /= len(self.dl_val) # evaluacion COCO print('\nVal AP/AR') self.val_accs, self.mean_mAP_val = self.ds_val.evaluate_overall_accuracy( all_preds, all_boxes, image_paths, output_dir=self.log_path)
def _predict_batch(self, image): with torch.no_grad(): heatmaps_list = None tags_list = [] # scales and base (size, center, scale) scales = (1, ) # ToDo add support to multiple scales scales = sorted(scales, reverse=True) base_size, base_center, base_scale = get_multi_scale_size( image[0], self.resolution, 1, 1) # for each scale (at the moment, just one scale) for idx, scale in enumerate(scales): # rescale image, convert to tensor, move to device images = list() for img in image: image, size_resized, _, _ = resize_align_multi_scale( img, self.resolution, scale, min(scales), interpolation=self.interpolation) image = self.transform( cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).unsqueeze(dim=0) image = image.to(self.device) images.append(image) images = torch.cat(images) # inference # output: list of HigherHRNet outputs (heatmaps) # avg_heatmaps: averaged heatmaps # tags: per-pixel identity ids. # See Newell et al., Associative Embedding: End-to-End Learning for Joint Detection and # Grouping, NIPS 2017. https://arxiv.org/abs/1611.05424 or # http://papers.nips.cc/paper/6822-associative-embedding-end-to-end-learning-for-joint-detection-and-grouping outputs, heatmaps, tags = get_multi_stage_outputs( self.model, images, with_flip=False, project2image=True, size_projected=size_resized, nof_joints=self.nof_joints, max_batch_size=self.max_batch_size) # aggregate the multiple heatmaps and tags heatmaps_list, tags_list = aggregate_results( scale, heatmaps_list, tags_list, heatmaps, tags, with_flip=False, project2image=True) heatmaps = heatmaps_list.float() / len(scales) tags = torch.cat(tags_list, dim=4) # refine prediction # grouped has the shape (people, joints, 4) -> 4: (x, y, confidence, tag) # scores has the shape (people, ) and corresponds to the person confidence before refinement grouped, scores = self.output_parser.parse( heatmaps, tags, adjust=True, refine=True # ToDo parametrize these two parameters ) # get final predictions final_results = get_final_preds( grouped, base_center, base_scale, [heatmaps.shape[3], heatmaps.shape[2]]) if self.filter_redundant_poses: # filter redundant poses - this step filters out poses whose joints have, on average, a difference # lower than 3 pixels # this is useful when refine=True in self.output_parser.parse because that step joins together # skeleton parts belonging to the same people (but then it does not remove redundant skeletons) final_pts = [] # for each image for i in range(len(final_results)): final_pts.insert(i, list()) # for each person for pts in final_results[i]: if len(final_pts[i]) > 0: diff = np.mean(np.abs( np.array(final_pts[i])[..., :2] - pts[..., :2]), axis=(1, 2)) if np.any( diff < 3 ): # average diff between this pose and another one is less than 3 pixels continue final_pts[i].append(pts) final_results = final_pts pts = [] boxes = [] for i in range(len(final_results)): pts.insert(i, np.asarray(final_results[i])) if len(pts[i]) > 0: pts[i][..., [0, 1]] = pts[i][..., [ 1, 0 ]] # restoring (y, x) order as in SimpleHRNet pts[i] = pts[i][..., :3] if self.return_bounding_boxes: left_top = np.min(pts[i][..., 0:2], axis=1) right_bottom = np.max(pts[i][..., 0:2], axis=1) # [x1, y1, x2, y2] boxes.insert( i, np.stack([ left_top[:, 1], left_top[:, 0], right_bottom[:, 1], right_bottom[:, 0] ], axis=-1)) else: boxes.insert(i, []) res = list() if self.return_heatmaps: res.append(heatmaps) if self.return_bounding_boxes: res.append(boxes) res.append(pts) if len(res) > 1: return res else: return res[0]
def _val(self): num_samples = len(self.ds_val) all_preds = np.zeros((num_samples, self.model_nof_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6), dtype=np.float32) image_paths = [] idx = 0 self.model.eval() losses = AverageMeter() avg_accs = AverageMeter() pbar = tqdm(self.dl_val, ncols=170) for step, (image, target, target_weight, joints_data) in enumerate(self.dl_val): image = image.cuda() target = target.cuda() target_weight = target_weight.cuda() output = self.model(image) if self.flip_test_images: image_flipped = flip_tensor(image, dim=-1) output_flipped = self.model(image_flipped) output_flipped = flip_back(output_flipped, self.ds_val.flip_pairs) output = (output + output_flipped) * 0.5 loss = self.loss_fn(output, target, target_weight) # Evaluate accuracy # Get predictions on the resized images (given as input) accs, avg_acc, cnt, joints_preds, joints_target = \ self.ds_train.evaluate_accuracy(output, target) losses.update(loss) avg_accs.update(avg_acc) # Original num_images = image.shape[0] log = f'[Epoch {self.epoch}] ' log += f'Valid loss : {loss.item():.4f}({losses.avg:.4f}) ' log += f'Valid acc : {avg_acc.item():.4f}({avg_accs.avg:.4f}) ' pbar.set_description(log) pbar.update() # measure elapsed time c = joints_data['center'].numpy() s = joints_data['scale'].numpy() score = joints_data['score'].numpy() pixel_std = 200 # ToDo Parametrize this preds, maxvals = get_final_preds( True, output, c, s, pixel_std) # ToDo check what post_processing exactly does all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2].detach().cpu().numpy() all_preds[idx:idx + num_images, :, 2:3] = maxvals.detach().cpu().numpy() # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1) all_boxes[idx:idx + num_images, 5] = score image_paths.extend(joints_data['imgPath']) idx += num_images self.mean_loss_val += loss.item() self.mean_acc_val += avg_acc.item() if self.use_tensorboard: self.summary_writer.add_scalar('Valid/Loss', loss.item(), global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar('Valid/Accuracy', avg_acc.item(), global_step=step + self.epoch * self.len_dl_val) if step == 0: save_images(image, target, joints_target, output, joints_preds, joints_data['joints_visibility'], self.summary_writer, step=step + self.epoch * self.len_dl_train, prefix='test_') self.mean_loss_val /= len(self.dl_val) self.mean_acc_val /= len(self.dl_val) # COCO evaluation # print('\nVal AP/AR') self.val_accs, self.mean_mAP_val = self.ds_val.evaluate_overall_accuracy( all_preds, all_boxes, image_paths, output_dir=self.log_path) mean_mAP = self.val_accs[ 'AP'] # Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] AP_5 = self.val_accs[ 'Ap .5'] # Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] AP_75 = self.val_accs[ 'AP .75'] # Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] mean_mAR = self.val_accs[ 'AR'] # Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.378 AR_5 = self.val_accs[ 'AR .5'] # Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] AR_75 = self.val_accs[ 'AR .75'] # Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] log = f'[EPOCH {self.epoch}] Valid Loss : {losses.avg:.4f}, ' log += f'Valid acc : {avg_accs.avg:.4f}, ' log += f'AP : {mean_mAP:.4f}, ' log += f'AP.5 : {AP_5:.4f}, ' log += f'AP.75 : {AP_75:.4f}, ' log += f'AR : {mean_mAR:.4f}, ' pbar.set_description(log) pbar.close() if self.use_tensorboard: self.summary_writer.add_scalar('Valid/mean_mAP', mean_mAP, global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar('Valid/AP.5', AP_5, global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar('Valid/AP.75', AP_75, global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar('Valid/mean_mAR', mean_mAR, global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar('Valid/AR.5', AR_5, global_step=step + self.epoch * self.len_dl_val) self.summary_writer.add_scalar('Valid/AR.75', AR_75, global_step=step + self.epoch * self.len_dl_val)