def __init__(self, eval_config, logger=None): if logger is None: logger = logging.getLogger(__name__) self.logger = logger self.feat_vis = eval_config['feat_vis'] self.thresh = eval_config['thresh'] self.nms = eval_config['nms'] self.class_agnostic = eval_config['class_agnostic'] self.classes = ['bg'] + eval_config['classes'] self.n_classes = len(self.classes) # self.batch_size = eval_config['batch_size'] self.eval_out = eval_config['eval_out'] self.test_type = eval_config['test_type'] # image visualizer for any dataset image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None self.visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir)
def test_corners_3d_coder(): # import ipdb # ipdb.set_trace() coder_config = {'type': constants.KEY_CORNERS_3D} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset() sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) # ry = compute_ray_angle(label_boxes_3d[:, :3]) # label_boxes_3d[:, -1] += ry label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0) label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0) proposals = torch.stack(1 * [proposals[:num_instances]], dim=0) p2 = torch.stack(1 * [p2], dim=0) # import ipdb # ipdb.set_trace() # label_boxes_3d[:, :, -1] = 0 encoded_corners_3d = bbox_coder.encode_batch(label_boxes_3d, label_boxes_2d, p2) # torch.cat([encoded_corners_2d, ]) num_boxes = encoded_corners_3d.shape[1] batch_size = encoded_corners_3d.shape[0] decoded_corners_3d = bbox_coder.decode_batch( encoded_corners_3d.view(batch_size, num_boxes, -1), proposals, p2) decoded_corners_2d = geometry_utils.torch_points_3d_to_points_2d( decoded_corners_3d[0].view(-1, 3), p2[0]).view(-1, 8, 2) decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy() image_path = sample[constants.KEY_IMAGE_PATH] image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) visualizer.render_image_corners_2d(image_path, decoded_corners_2d)
def main(): normal_mean = np.asarray([0.485, 0.456, 0.406]) normal_van = np.asarray([0.229, 0.224, 0.225]) dataset = build_dataset() image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=True, save_dir=save_dir) for sample in dataset: label_boxes_3d = sample['gt_boxes_3d'] label_boxes_2d = sample['gt_boxes'] label_classes = sample['gt_labels'] p2 = torch.from_numpy(sample['p2']) image_path = sample['img_name'] label_boxes_3d = torch.cat([ label_boxes_3d[:, 3:6], label_boxes_3d[:, :3], label_boxes_3d[:, 6:] ], dim=-1) corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d( label_boxes_3d) image = sample['img'].permute(1, 2, 0).cpu().detach().numpy() image = image.copy() image = image * normal_van + normal_mean # import ipdb # ipdb.set_trace() corners_3d = corners_3d.cpu().detach().numpy() visualizer.render_image_corners_2d(image_path, image, corners_3d=corners_3d, p2=p2)
def __init__(self, args): # first setup logger self.logger = setup_logger() self.args = args self.config = self.generate_config(args, self.logger) self.data_config = self.config['eval_data_config'] self.dataset_config = self.data_config['dataset_config'] self.classes = ['bg'] + self.dataset_config['classes'] self.n_classes = len(self.classes) colors = [] for i in range(self.n_classes): colors.append(self.get_random_color()) self.eval_config = self.config['eval_config'] self.thresh = self.eval_config['thresh'] self.nms = self.eval_config['nms'] image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None self.visualizer = ImageVisualizer( image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) self.visualizer.colors = colors self.visualizer.classes = self.classes
def generate_visualizer(): image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/optimized' calib_dir = '/data/object/training/calib' label_dir = None # label_dir = '/data/object/training/label_2' calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) return visualizer
def build_visualizer(): image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer( image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=True, save_dir=save_dir) return visualizer
def test_mobileye_coder(): coder_config = {'type': constants.KEY_MOBILEYE} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset('kitti') image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) for sample in dataset: label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) image_info = torch.from_numpy(sample[constants.KEY_IMAGE_INFO]) label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0) label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0) proposals = torch.stack(1 * [proposals[:num_instances]], dim=0) image_info = torch.stack(1 * [image_info], dim=0) p2 = torch.stack(1 * [p2], dim=0) encoded_corners_2d = bbox_coder.encode_batch(label_boxes_3d, label_boxes_2d, p2, image_info, label_boxes_2d) # torch.cat([encoded_corners_2d, ]) # num_boxes = encoded_corners_2d.shape[1] # batch_size = encoded_corners_2d.shape[0] # center_depth = encoded_corners_2d[:, :, -1:] # encoded_corners_2d = encoded_corners_2d[:, :, :-1].view( # batch_size, num_boxes, 8, 4) # encoded_visibility = torch.zeros_like(encoded_corners_2d[:, :, :, :2]) # visibility = encoded_corners_2d[:, :, :, -1:].long() # row = torch.arange(0, visibility.numel()).type_as(visibility) # encoded_visibility.view(-1, 2)[row, visibility.view(-1)] = 1 # encoded_corners_2d = torch.cat( # [encoded_corners_2d[:, :, :, :3], encoded_visibility], dim=-1) # encoded_corners_2d = torch.cat( # [encoded_corners_2d.view(batch_size, num_boxes, -1), center_depth], # dim=-1) decoded_corners_2d = bbox_coder.decode_batch(encoded_corners_2d, proposals) decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy() # import ipdb # ipdb.set_trace() image_path = sample[constants.KEY_IMAGE_PATH] visualizer.render_image_corners_2d(image_path, corners_2d=decoded_corners_2d[0], p2=p2[0])
def test_keypoint_hm_coder(): coder_config = {'type': constants.KEY_KEYPOINTS_HEATMAP} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset(dataset_type='keypoint_kitti') sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) keypoints = sample[constants.KEY_KEYPOINTS] # ry = compute_ray_angle(label_boxes_3d[:, :3]) # label_boxes_3d[:, -1] += ry label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0) label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0) proposals = torch.stack(1 * [proposals[:num_instances]], dim=0) keypoints = torch.stack(1 * [keypoints[:num_instances]], dim=0) p2 = torch.stack(1 * [p2], dim=0) # label_boxes_3d[:, :, -1] = 0 # import ipdb # ipdb.set_trace() encoded_corners_3d = bbox_coder.encode_batch(proposals, keypoints) # torch.cat([encoded_corners_2d, ]) num_boxes = encoded_corners_3d.shape[1] batch_size = encoded_corners_3d.shape[0] keypoint_heatmap = encoded_corners_3d.view(batch_size, num_boxes, 8, -1)[..., :-1] # resolution = bbox_coder.resolution # keypoint_heatmap = torch.zeros((batch_size * num_boxes * 8, resolution * resolution)) # row = torch.arange(keypoint.numel()).type_as(keypoint) # keypoint_heatmap[row, keypoint.view(-1)] = 1 # keypoint_heatmap = torch.stack([keypoint_heatmap] * 3, dim=1) # reshape before decode keypoint_heatmap = keypoint_heatmap.contiguous().view( batch_size, num_boxes, -1) decoded_corners_2d = bbox_coder.decode_batch(proposals, keypoint_heatmap) decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy() image_path = sample[constants.KEY_IMAGE_PATH] image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) # import ipdb # ipdb.set_trace() visualizer.render_image_corners_2d(image_path, corners_2d=decoded_corners_2d[0])
class Tester(object): def __init__(self, eval_config, logger=None): if logger is None: logger = logging.getLogger(__name__) self.logger = logger self.feat_vis = eval_config['feat_vis'] self.thresh = eval_config['thresh'] self.nms = eval_config['nms'] self.class_agnostic = eval_config['class_agnostic'] self.classes = ['bg'] + eval_config['classes'] self.n_classes = len(self.classes) # self.batch_size = eval_config['batch_size'] self.eval_out = eval_config['eval_out'] self.test_type = eval_config['test_type'] # image visualizer for any dataset image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None self.visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) def _generate_label_path(self, image_path): image_name = os.path.basename(image_path) sample_name = os.path.splitext(image_name)[0] label_name = sample_name + '.txt' return os.path.join(self.eval_out, label_name) def save_mono_3d_dets(self, dets, label_path): res_str = [] kitti_template = '{} -1 -1 -10 {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.8f}' # kitti_template = '{} -1 -1 -10 {:.3f} {:.3f} {:.3f} {:.3f} -1 -1 -1 -1000 -1000 -1000 -10 {:.8f}' with open(label_path, 'w') as f: for cls_ind, dets_per_classes in enumerate(dets): if self.classes[cls_ind] == 'Tram': continue for det in dets_per_classes: # xmin, ymin, xmax, ymax, cf, l, h, w, ry, x, y, z = det xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry = det res_str.append( kitti_template.format(self.classes[cls_ind], xmin, ymin, xmax, ymax, h, w, l, x, y, z, ry, cf)) # xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, alpha = det # res_str.append( # kitti_template.format(self.classes[cls_ind], xmin, # ymin, xmax, ymax, h, w, l, x, y, # z, alpha, cf)) f.write('\n'.join(res_str)) def save_dets(self, dets, label_path, image_path): res_str = [] # kitti_template = '{} -1 -1 -10 {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.8f}' kitti_template = '{} -1 -1 -10 {:.3f} {:.3f} {:.3f} {:.3f} -1 -1 -1 -1000 -1000 -1000 -10 {:.8f}' with open(label_path, 'w') as f: for cls_ind, dets_per_classes in enumerate(dets): for det in dets_per_classes: xmin, ymin, xmax, ymax, cf = det res_str.append( kitti_template.format(self.classes[cls_ind], xmin, ymin, xmax, ymax, cf)) # xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, alpha = det # res_str.append( # kitti_template.format(self.classes[cls_ind], xmin, # ymin, xmax, ymax, h, w, l, x, y, # z, alpha, cf)) f.write('\n'.join(res_str)) # image = self.visualizer.parse_image(image_path) # self.visualizer.render_image_2d(image, boxes_2d, label_classes) def test_corners_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] # dims = prediction[constants.KEY_DIMS] corners_2d = prediction[constants.KEY_CORNERS_2D] # import ipdb # ipdb.set_trace() p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] # dims_per_img = dims[batch_ind] corners_2d_per_img = corners_2d[batch_ind] p2_per_img = p2[batch_ind] num_cols = corners_2d.shape[-1] dets = [np.zeros((0, 8, num_cols), dtype=np.float32)] dets_2d = [np.zeros((0, 4), dtype=np.float32)] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # threshed_dims_per_img = dims_per_img[inds] threshed_corners_2d_per_img = corners_2d_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat( [ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), # threshed_dims_per_img, ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] threshed_corners_2d_per_img = threshed_corners_2d_per_img[ order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() nms_corners_2d_per_img = threshed_corners_2d_per_img[ keep].detach().cpu().numpy() dets.append(nms_corners_2d_per_img) dets_2d.append(nms_dets_per_img[:, :4]) else: dets.append( np.zeros((0, 8, num_cols), dtype=np.float32)) dets_2d.append(np.zeros((0, 4))) # import ipdb # ipdb.set_trace() corners = np.concatenate(dets, axis=0) dets_2d = np.concatenate(dets_2d, axis=0) corners_2d = None corners_3d = None if num_cols == 3: corners_3d = corners else: corners_2d = corners self.visualizer.render_image_corners_2d( image_path[0], boxes_2d=dets_2d, corners_2d=corners_2d, corners_3d=corners_3d, p2=p2_per_img.cpu().numpy()) duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time() def test_3d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] dims_per_img = dims[batch_ind] orients_per_img = orients[batch_ind] p2_per_img = p2[batch_ind] # rcnn_3d_per_img = rcnn_3d[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # threshed_rcnn_3d_per_img = threshed_rcnn_3d_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() # nms_rcnn_3d_per_img = threshed_rcnn_3d_per_img[keep].detach().cpu().numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) # import ipdb # ipdb.set_trace() # location, _ = mono_3d_postprocess_bbox( # nms_rcnn_3d_per_img, nms_dets_per_img[:, :5], # p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate([ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) # nms_dets_per_img = np.concatenate( # [nms_dets_per_img[:, :5], location], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_mono_3d_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time() def test_2d(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction, _, _ = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_dets(dets, label_path, image_path[batch_ind]) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time() def test_super_nms(self, dataloader, model, logger): self.logger.info('Start testing') num_samples = len(dataloader) if self.feat_vis: # enable it before forward pass model.enable_feat_vis() end_time = 0 for step, data in enumerate(dataloader): # start_time = time.time() data = common.to_cuda(data) image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # duration_time = time.time() - start_time if self.feat_vis: featmaps_dict = model.get_feat() from utils.visualizer import FeatVisualizer feat_visualizer = FeatVisualizer() feat_visualizer.visualize_maps(featmaps_dict) # initialize dets for each classes # dets = [[] for class_ind in range(self.n_classes)] dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] batch_size = scores.shape[0] # scores = scores.view(-1, self.n_classes) # new_scores = torch.zeros_like(scores) # _, scores_argmax = scores.max(dim=-1) # row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) # new_scores[row, scores_argmax] = scores[row, scores_argmax] # scores = new_scores.view(batch_size, -1, self.n_classes) # if step == 6: # import ipdb # ipdb.set_trace() for batch_ind in range(batch_size): boxes_2d_per_img = boxes_2d[batch_ind] scores_per_img = scores[batch_ind] for class_ind in range(1, self.n_classes): # cls thresh # import ipdb # ipdb.set_trace() inds = torch.nonzero( scores_per_img[:, class_ind] > 0.01).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: # if self.class_agnostic: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] # else: # threshed_boxes_2d_per_img = boxes_2d_per_img[ # inds, class_ind * 4:class_ind * 4 + 4] # concat boxes and scores threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms # keep = nms(threshed_dets_per_img[:, :4], # threshed_dets_per_img[:, 4], # self.nms).view(-1).long() keep = box_ops.super_nms(threshed_dets_per_img[:, :4], 0.8, nms_num=3, loop_time=2) nms_dets_per_img = threshed_dets_per_img[keep].detach( ).cpu().numpy() dets.append(nms_dets_per_img) else: dets.append([]) duration_time = time.time() - end_time label_path = self._generate_label_path(image_path[batch_ind]) self.save_dets(dets, label_path) sys.stdout.write('\r{}/{},duration: {}'.format( step + 1, num_samples, duration_time)) sys.stdout.flush() end_time = time.time() def test(self, dataloader, model, logger): test_fn = getattr(self, self.test_type) test_fn(dataloader, model, logger)
def test_bbox_coder(): bbox_coder = BBox3DCoder({}) dataset = build_dataset() image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer( image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=True, save_dir=save_dir) for sample in dataset: mean_dims = torch.from_numpy(sample['mean_dims'][None]) label_boxes_3d = sample['gt_boxes_3d'] label_boxes_2d = sample['gt_boxes'] label_classes = sample['gt_labels'] p2 = torch.from_numpy(sample['p2']) bbox_coder.mean_dims = mean_dims encoded_corners_2d = bbox_coder.encode_batch_bbox( label_boxes_3d, label_boxes_2d, label_classes, p2) # side_lines = encoded_corners_2d[:, 16:20] # encoded_corners_2d = torch.cat( # [ # encoded_corners_2d[:, :6], encoded_corners_2d[:, 6:11], # encoded_corners_2d[:, 10:11], encoded_corners_2d[:, 11:16], # encoded_corners_2d[:, 15:16] # ], # dim=-1) decoded_corners_2d = bbox_coder.decode_batch_bbox( encoded_corners_2d, label_boxes_2d, p2) boxes_3d = torch.cat( [ decoded_corners_2d[:, 6:9], decoded_corners_2d[:, 3:6], decoded_corners_2d[:, -1:] ], dim=-1) # boxes_3d = decoded_corners_2d corners_3d = geometry_utils.torch_boxes_3d_to_corners_3d(boxes_3d) corners_3d = corners_3d.cpu().detach().numpy() # import ipdb # ipdb.set_trace() # image_path = sample[] image_path = sample['img_name'] image = sample['img'].permute(1, 2, 0).cpu().detach().numpy() image = image.copy() image = image * normal_van + normal_mean # image = None # corners_2d = torch.cat([side_lines] * 4, dim=-1).view(-1, 8, 2) # corners_2d = corners_2d.cpu().detach().numpy() visualizer.render_image_corners_2d( image_path, image, corners_3d=corners_3d, p2=p2)
class Mono3DInfer(object): KITTI_MEAN_DIMS = { 'Car': [3.88311640418, 1.62856739989, 1.52563191462], 'Van': [5.06763659, 1.9007158, 2.20532825], 'Truck': [10.13586957, 2.58549199, 3.2520595], 'Pedestrian': [0.84422524, 0.66068622, 1.76255119], 'Person_sitting': [0.80057803, 0.5983815, 1.27450867], 'Cyclist': [1.76282397, 0.59706367, 1.73698127], 'Tram': [16.17150617, 2.53246914, 3.53079012], 'Misc': [3.64300781, 1.54298177, 1.92320313] } def get_random_color(self): color_code = [] for _ in range(3): color_code.append(random.randint(0, 255)) return color_code def __init__(self, args): # first setup logger self.logger = setup_logger() self.args = args self.config = self.generate_config(args, self.logger) self.data_config = self.config['eval_data_config'] self.dataset_config = self.data_config['dataset_config'] self.classes = ['bg'] + self.dataset_config['classes'] self.n_classes = len(self.classes) colors = [] for i in range(self.n_classes): colors.append(self.get_random_color()) self.eval_config = self.config['eval_config'] self.thresh = self.eval_config['thresh'] self.nms = self.eval_config['nms'] image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None self.visualizer = ImageVisualizer( image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) self.visualizer.colors = colors self.visualizer.classes = self.classes def preprocess(self, im, stereo_calib_p2): """ Convert image to data dict """ image_input = im image_shape = image_input.size[::-1] # no scale now image_scale = (1.0, 1.0) image_info = image_shape + image_scale # as for calib, it can read from different files # for each sample or single file for all samples transform_sample = {} transform_sample[constants.KEY_IMAGE] = image_input transform_sample[ constants.KEY_STEREO_CALIB_P2] = stereo_calib_p2.astype(np.float32) # (h,w,scale) transform_sample[constants.KEY_IMAGE_INFO] = np.asarray( image_info, dtype=np.float32) mean_dims = self._generate_mean_dims() transform_sample[constants.KEY_MEAN_DIMS] = mean_dims transform_sample[constants.KEY_STEREO_CALIB_P2_ORIG] = np.copy( transform_sample[constants.KEY_STEREO_CALIB_P2]) # transform transform_config = self.data_config['transform_config'] transform = transforms.build(transform_config) training_sample = transform(transform_sample) return training_sample def _generate_mean_dims(self): mean_dims = [] for class_type in self.classes[1:]: mean_dims.append(self.KITTI_MEAN_DIMS[class_type][::-1]) return np.stack(mean_dims, axis=0).astype(np.float32) def to_batch(self, data): # import ipdb # ipdb.set_trace() for key in data: data[key] = data[key][None, ...] return data def inference(self, im, p2): """ Args: im: shape(N, 3, H, W) Returns: dets: shape(N, M, 8) """ config = self.config args = self.args eval_config = config['eval_config'] model_config = config['model_config'] data_config = config['eval_data_config'] np.random.seed(eval_config['rng_seed']) self.logger.info('Using config:') pprint.pprint({ 'model_config': model_config, 'data_config': data_config, 'eval_config': eval_config }) eval_out = eval_config['eval_out'] if not os.path.exists(eval_out): self.logger.info('creat eval out directory {}'.format(eval_out)) os.makedirs(eval_out) else: self.logger.warning('dir {} exist already!'.format(eval_out)) # restore from random or checkpoint restore = True # two methods to load model # 1. load from any other dirs,it just needs config and model path # 2. load from training dir if args.model is not None: # assert args.model is not None, 'please determine model or checkpoint' # it should be a path to model checkpoint_name = os.path.basename(args.model) input_dir = os.path.dirname(args.model) elif args.checkpoint is not None: checkpoint_name = 'detector_{}.pth'.format(args.checkpoint) assert args.load_dir is not None, 'please choose a directory to load checkpoint' eval_config['load_dir'] = args.load_dir input_dir = os.path.join(eval_config['load_dir'], model_config['type'], data_config['name']) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from {}'. format(input_dir)) else: restore = False # log for restore if restore: self.logger.info("restore from checkpoint") else: self.logger.info("use pytorch default initialization") # model model = detectors.build(model_config) model.eval() if restore: # saver saver = Saver(input_dir) saver.load({'model': model}, checkpoint_name) model = model.cuda() # dataloader = dataloaders.make_data_loader(data_config, training=False) self.logger.info('Start testing') # num_samples = len(dataloader) # for step, data in enumerate(dataloader): data = self.preprocess(im, p2) data = self.to_batch(data) data = common.to_cuda(data) # image_path = data[constants.KEY_IMAGE_PATH] with torch.no_grad(): prediction = model(data) # initialize dets for each classes dets = [[]] scores = prediction[constants.KEY_CLASSES] boxes_2d = prediction[constants.KEY_BOXES_2D] dims = prediction[constants.KEY_DIMS] orients = prediction[constants.KEY_ORIENTS_V2] p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG] # rcnn_3d = prediction['rcnn_3d'] batch_size = scores.shape[0] scores = scores.view(-1, self.n_classes) new_scores = torch.zeros_like(scores) _, scores_argmax = scores.max(dim=-1) row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax) new_scores[row, scores_argmax] = scores[row, scores_argmax] scores = new_scores.view(batch_size, -1, self.n_classes) boxes_2d_per_img = boxes_2d[0] scores_per_img = scores[0] dims_per_img = dims[0] orients_per_img = orients[0] p2_per_img = p2[0] # rcnn_3d_per_img = rcnn_3d[batch_ind] # import ipdb # ipdb.set_trace() for class_ind in range(1, self.n_classes): # cls thresh inds = torch.nonzero( scores_per_img[:, class_ind] > self.thresh).view(-1) threshed_scores_per_img = scores_per_img[inds, class_ind] if inds.numel() > 0: threshed_boxes_2d_per_img = boxes_2d_per_img[inds] threshed_dims_per_img = dims_per_img[inds] threshed_orients_per_img = orients_per_img[inds] threshed_dets_per_img = torch.cat([ threshed_boxes_2d_per_img, threshed_scores_per_img.unsqueeze(-1), threshed_dims_per_img, threshed_orients_per_img.unsqueeze(-1) ], dim=-1) # sort by scores _, order = torch.sort(threshed_scores_per_img, 0, True) threshed_dets_per_img = threshed_dets_per_img[order] # nms keep = nms(threshed_dets_per_img[:, :4], threshed_dets_per_img[:, 4], self.nms).view(-1).long() nms_dets_per_img = threshed_dets_per_img[keep].detach().cpu( ).numpy() # calculate location location = geometry_utils.calc_location( nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5], nms_dets_per_img[:, 8], p2_per_img.cpu().numpy()) nms_dets_per_img = np.concatenate( [ nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8], location, nms_dets_per_img[:, -1:] ], axis=-1) dets.append(nms_dets_per_img) else: dets.append([]) # duration_time = time.time() - end_time # label_path = self._generate_label_path(image_path[batch_ind]) # self.save_mono_3d_dets(dets, label_path) # sys.stdout.write('\r{}/{},duration: {}'.format( # step + 1, num_samples, duration_time)) # sys.stdout.flush() # end_time = time.time() # xmin, ymin, xmax, ymax, cf, h, w, l, x, y, z, ry return dets def parse_kitti_format(self, dets, p2): """ Args: dets: (N, 12) Returns: results: (boxes_3d, boxes_2d, label_classes, p2) """ label_classes = [] boxes_2d = [] boxes_3d = [] for cls_ind, det_per_cls in enumerate(dets): if len(det_per_cls) == 0: continue boxes_2d.append(det_per_cls[:, :5]) boxes_3d.append(det_per_cls[:, [8, 9, 10, 5, 6, 7, 11, 4]]) label_classes.extend([cls_ind] * det_per_cls.shape[0]) p2 = p2 boxes_2d = np.concatenate(boxes_2d, axis=0) boxes_3d = np.concatenate(boxes_3d, axis=0) label_classes = np.asarray(label_classes) return boxes_3d, boxes_2d, label_classes, p2 def vis_result(self, im_to_show, dets, p2): """ Args: im_to_show: shape(H, W, 3) """ results = self.parse_kitti_format(dets, p2) image = self.visualizer.render_image(im_to_show, results) # self.visualizer.render_image_3d(im_to_show, dets, self.label_classes, # p2) # if self.online: # image postprocess cv2.imshow("test", image) cv2.waitKey(0) # else: # sample_name = self.get_sample_name_from_path(image_path) # saved_path = self.get_saved_path(sample_name) # cv2.imwrite(saved_path, image) def generate_config(self, args, logger): # read config from file if args.config is None: output_dir = os.path.join(args.load_dir, args.net, args.dataset) config_path = Config.infer_fromdir(output_dir) else: config_path = args.config config = Config.fromjson(config_path) eval_config = config['eval_config'] model_config = config['model_config'] data_config = config['eval_data_config'] np.random.seed(eval_config['rng_seed']) torch.backends.cudnn.benchmark = True model_config['pretrained'] = False eval_config['feat_vis'] = args.feat_vis assert args.net is not None, 'please select a base model' model_config['type'] = args.net # use multi gpus to parallel eval_config['mGPUs'] = args.mGPUs eval_config['cuda'] = args.cuda # use pretrained model to initialize eval_config['model'] = args.model eval_config['checkpoint'] = args.checkpoint if args.nms is not None: eval_config['nms'] = args.nms if args.thresh is not None: eval_config['thresh'] = args.thresh model_config['score_thresh'] = args.thresh if args.img_path: dataset_config = data_config['dataset_config'] # disable dataset file,just use image directly dataset_config['dataset_file'] = None dataset_config['demo_file'] = args.img_path dataset_config['calib_file'] = args.calib_file if args.img_dir: dataset_config = data_config['dataset_config'] # disable dataset file,just use image directly dataset_config['dataset_file'] = None dataset_config['img_dir'] = args.img_dir if args.calib_file: dataset_config = data_config['dataset_config'] dataset_config['calib_file'] = args.calib_file if args.calib_dir: dataset_config = data_config['dataset_config'] dataset_config['calib_dir'] = args.calib_dir return config
image = np.asarray(image) visualize_bbox(image, label_boxes_2d[:num_instances], save=True) if __name__ == '__main__': from utils.drawer import ImageVisualizer image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer( image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) dataset_config = { "classes": ["car"], "data_path": "leftImg8bit_trainvaltest/leftImg8bit", "dataset_file": "data/demo.txt", "label_path": "./gtFine", "root_path": "/data/Cityscape", "type": "cityscape" } # import ipdb # ipdb.set_trace() transform = None dataset = CityScapeDataset(dataset_config, transform, training=True)