def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint( 0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): ims = image_utils.read_image_video(roidb[i]) for im_id, im in enumerate(ims): if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) ims[im_id] = im[0] # Just taking the im_scale for the last im in ims is fine (all are same) im_scales.append(im_scale[0]) processed_ims += ims # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def _generate_visualizations(entry, ix, all_boxes, all_keyps, all_tracks, thresh): im = image_utils.read_image_video(entry, key_frame_only=True)[0] cls_boxes_i = [ _id_or_index(ix, all_boxes[j]) for j in range(len(all_boxes)) ] if all_keyps is not None: cls_keyps_i = [ _id_or_index(ix, all_keyps[j]) for j in range(len(all_keyps)) ] else: cls_keyps_i = None if all_tracks is not None: cls_tracks_i = [ _id_or_index(ix, all_tracks[j]) for j in range(len(all_tracks)) ] else: cls_tracks_i = None pred = _vis_single_frame(im.copy(), cls_boxes_i, None, cls_keyps_i, cls_tracks_i, thresh) gt = _vis_single_frame( im.copy(), [[], _convert_roidb_to_pred_boxes(entry['boxes'])], None, [[], _convert_roidb_to_pred_keyps(entry['gt_keypoints'])], [[], _convert_roidb_to_pred_tracks(entry['tracks'])], 0.1) return gt, pred
def compute_optical_flow(video_json_data): frames = [ img_utils.read_image_video(el, key_frame_only=True)[0] for el, _ in video_json_data ] if len(frames) == 0: return frames = [ cv2.cvtColor(el.astype('uint8'), cv2.COLOR_BGR2GRAY) for el in frames ] flows = [] neg_flows = [] all_pairs = [(frames[i], frames[i + 1]) for i in range(len(frames) - 1)] with closing(mp.Pool(32)) as pool: # https://stackoverflow.com/a/25968716/1492614 all_pairs_flow = list( tqdm(pool.imap(run_farneback, all_pairs), total=len(all_pairs), desc='Computing flow', leave=False)) pool.terminate() # No negative flow defined for the fist frame neg_flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2))) for frame_id in range(len(all_pairs_flow)): flow = all_pairs_flow[frame_id] flows.append(flow) neg_flows.append(_compute_neg_flow(flow)) # no flow defined for last frame flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2))) return flows, neg_flows
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): ims = image_utils.read_image_video(roidb[i]) for im_id, im in enumerate(ims): if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) ims[im_id] = im[0] # Just taking the im_scale for the last im in ims is fine (all are same) im_scales.append(im_scale[0]) processed_ims += ims # Create a blob to hold the input images blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales
def compute_optical_flow(video_json_data): frames = [img_utils.read_image_video(el, key_frame_only=True)[0] for el, _ in video_json_data] if len(frames) == 0: return frames = [cv2.cvtColor(el.astype('uint8'), cv2.COLOR_BGR2GRAY) for el in frames] flows = [] neg_flows = [] all_pairs = [(frames[i], frames[i + 1]) for i in range(len(frames) - 1)] with closing(mp.Pool(32)) as pool: # https://stackoverflow.com/a/25968716/1492614 all_pairs_flow = list(tqdm(pool.imap(run_farneback, all_pairs), total=len(all_pairs), desc='Computing flow', leave=False)) pool.terminate() # No negative flow defined for the fist frame neg_flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2))) for frame_id in range(len(all_pairs_flow)): flow = all_pairs_flow[frame_id] flows.append(flow) neg_flows.append(_compute_neg_flow(flow)) # no flow defined for last frame flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2))) return flows, neg_flows
def vis_predictions(entry, cur_boxes, cur_poses): """ A simple function to visualize the tubes and keypoint predictions on the tubes. Args: entry (dict or image/video): An entry from roidb, or the image itself (if overlaying) cur_boxes (np.ndarray): Shape Nx(4T+1), last dimension is score. cur_keypoints (list of np.ndarray): List has N elements, each of which is 4x17T """ if isinstance(entry, dict): frames = image_utils.read_image_video(entry) else: frames = entry if len(cur_boxes) == 0 or cur_boxes.size == 0 or len(cur_poses) == 0: return frames time_dim = (cur_boxes.shape[-1] - 1) // 4 num_keypoints = cur_poses[0].shape[-1] // time_dim res = [] for t in range(time_dim): pred = vis_one_image_opencv( frames[t], cur_boxes[:, np.array(range(4 * t, 4 * (t + 1)) + [-1])], keypoints=[ el[..., t * num_keypoints:(t + 1) * num_keypoints] for el in cur_poses ], show_box=True) res.append(pred) return res
def _generate_visualizations(entry, ix, all_boxes, all_keyps, all_tracks, thresh): im = image_utils.read_image_video(entry, key_frame_only=True)[0] cls_boxes_i = [ _id_or_index(ix, all_boxes[j]) for j in range(len(all_boxes))] if all_keyps is not None: cls_keyps_i = [ _id_or_index(ix, all_keyps[j]) for j in range(len(all_keyps))] else: cls_keyps_i = None if all_tracks is not None: cls_tracks_i = [ _id_or_index(ix, all_tracks[j]) for j in range(len(all_tracks))] else: cls_tracks_i = None pred = _vis_single_frame( im.copy(), cls_boxes_i, None, cls_keyps_i, cls_tracks_i, thresh) gt = _vis_single_frame( im.copy(), [[], _convert_roidb_to_pred_boxes(entry['boxes'])], None, [[], _convert_roidb_to_pred_keyps(entry['gt_keypoints'])], [[], _convert_roidb_to_pred_tracks(entry['tracks'])], 0.1) return gt, pred
def test_net(ind_range=None): assert cfg.TEST.WEIGHTS != '', \ 'TEST.WEIGHTS must be set to the model file to test' assert not cfg.MODEL.RPN_ONLY, \ 'Use rpn_generate to generate proposals from RPN-only models' assert cfg.TEST.DATASET != '', \ 'TEST.DATASET must be set to the dataset name to test' output_dir = get_output_dir(training=False) roidb, dataset, start_ind, end_ind, total_num_images = \ get_roidb_and_dataset(ind_range) model = initialize_model_from_cfg() num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images) timers = defaultdict(Timer) gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, cfg.ROOT_GPU_ID) name_scope = 'gpu_{}'.format(cfg.ROOT_GPU_ID) for i, entry in enumerate(roidb): if cfg.MODEL.FASTER_RCNN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select only the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = entry['boxes'][entry['gt_classes'] == 0] if len(box_proposals) == 0: continue im = image_utils.read_image_video(entry) with core.NameScope(name_scope): with core.DeviceScope(gpu_dev): cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all( model, im, box_proposals, timers) extend_results(i, all_boxes, cls_boxes_i) if cls_segms_i is not None: extend_results(i, all_segms, cls_segms_i) if cls_keyps_i is not None: extend_results(i, all_keyps, cls_keyps_i) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = (timers['im_detect_bbox'].average_time + timers['im_detect_mask'].average_time + timers['im_detect_keypoints'].average_time) misc_time = (timers['misc_bbox'].average_time + timers['misc_mask'].average_time + timers['misc_keypoints'].average_time) logger.info(('im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta)) if cfg.VIS: im_name = os.path.splitext(os.path.basename(entry['image']))[0] vis_utils.vis_one_image(im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name), os.path.join(output_dir, 'vis'), cls_boxes_i, segms=cls_segms_i, keypoints=cls_keyps_i, thresh=cfg.VIS_THR, box_alpha=0.8, dataset=dataset, show_class=True) cfg_yaml = yaml.dump(cfg) if ind_range is not None: det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) robust_pickle_dump( dict(all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, cfg=cfg_yaml), det_file) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_boxes, all_segms, all_keyps
def test_net(ind_range=None): assert cfg.TEST.WEIGHTS != '', \ 'TEST.WEIGHTS must be set to the model file to test' assert not cfg.MODEL.RPN_ONLY, \ 'Use rpn_generate to generate proposals from RPN-only models' assert cfg.TEST.DATASET != '', \ 'TEST.DATASET must be set to the dataset name to test' output_dir = get_output_dir(training=False) roidb, dataset, start_ind, end_ind, total_num_images = \ get_roidb_and_dataset(ind_range) model = initialize_model_from_cfg() num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images) timers = defaultdict(Timer) gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, cfg.ROOT_GPU_ID) name_scope = 'gpu_{}'.format(cfg.ROOT_GPU_ID) for i, entry in enumerate(roidb): if cfg.MODEL.FASTER_RCNN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select only the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = entry['boxes'][entry['gt_classes'] == 0] if len(box_proposals) == 0: continue im = image_utils.read_image_video(entry) with core.NameScope(name_scope): with core.DeviceScope(gpu_dev): cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all( model, im, box_proposals, timers) extend_results(i, all_boxes, cls_boxes_i) if cls_segms_i is not None: extend_results(i, all_segms, cls_segms_i) if cls_keyps_i is not None: extend_results(i, all_keyps, cls_keyps_i) if i % 10 == 0: # Reduce log file size ave_total_time = np.sum([t.average_time for t in timers.values()]) eta_seconds = ave_total_time * (num_images - i - 1) eta = str(datetime.timedelta(seconds=int(eta_seconds))) det_time = (timers['im_detect_bbox'].average_time + timers['im_detect_mask'].average_time + timers['im_detect_keypoints'].average_time) misc_time = (timers['misc_bbox'].average_time + timers['misc_mask'].average_time + timers['misc_keypoints'].average_time) logger.info( ('im_detect: range [{:d}, {:d}] of {:d}: ' '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format( start_ind + 1, end_ind, total_num_images, start_ind + i + 1, start_ind + num_images, det_time, misc_time, eta)) if cfg.VIS: im_name = os.path.splitext(os.path.basename(entry['image']))[0] vis_utils.vis_one_image( im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name), os.path.join(output_dir, 'vis'), cls_boxes_i, segms=cls_segms_i, keypoints=cls_keyps_i, thresh=cfg.VIS_THR, box_alpha=0.8, dataset=dataset, show_class=True) cfg_yaml = yaml.dump(cfg) if ind_range is not None: det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range) else: det_name = 'detections.pkl' det_file = os.path.join(output_dir, det_name) robust_pickle_dump( dict(all_boxes=all_boxes, all_segms=all_segms, all_keyps=all_keyps, cfg=cfg_yaml), det_file) logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file))) return all_boxes, all_segms, all_keyps