示例#1
0
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(
        0, high=len(cfg.TRAIN.SCALES), size=num_images)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        ims = image_utils.read_image_video(roidb[i])
        for im_id, im in enumerate(ims):
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im, im_scale = blob_utils.prep_im_for_blob(
                im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
            ims[im_id] = im[0]
        # Just taking the im_scale for the last im in ims is fine (all are same)
        im_scales.append(im_scale[0])
        processed_ims += ims

    # Create a blob to hold the input images
    blob = blob_utils.im_list_to_blob(processed_ims)
    return blob, im_scales
示例#2
0
def _generate_visualizations(entry, ix, all_boxes, all_keyps, all_tracks,
                             thresh):
    im = image_utils.read_image_video(entry, key_frame_only=True)[0]
    cls_boxes_i = [
        _id_or_index(ix, all_boxes[j]) for j in range(len(all_boxes))
    ]
    if all_keyps is not None:
        cls_keyps_i = [
            _id_or_index(ix, all_keyps[j]) for j in range(len(all_keyps))
        ]
    else:
        cls_keyps_i = None
    if all_tracks is not None:
        cls_tracks_i = [
            _id_or_index(ix, all_tracks[j]) for j in range(len(all_tracks))
        ]
    else:
        cls_tracks_i = None
    pred = _vis_single_frame(im.copy(), cls_boxes_i, None, cls_keyps_i,
                             cls_tracks_i, thresh)
    gt = _vis_single_frame(
        im.copy(), [[], _convert_roidb_to_pred_boxes(entry['boxes'])], None,
        [[], _convert_roidb_to_pred_keyps(entry['gt_keypoints'])],
        [[], _convert_roidb_to_pred_tracks(entry['tracks'])], 0.1)
    return gt, pred
def compute_optical_flow(video_json_data):
    frames = [
        img_utils.read_image_video(el, key_frame_only=True)[0]
        for el, _ in video_json_data
    ]
    if len(frames) == 0:
        return
    frames = [
        cv2.cvtColor(el.astype('uint8'), cv2.COLOR_BGR2GRAY) for el in frames
    ]
    flows = []
    neg_flows = []
    all_pairs = [(frames[i], frames[i + 1]) for i in range(len(frames) - 1)]
    with closing(mp.Pool(32)) as pool:
        # https://stackoverflow.com/a/25968716/1492614
        all_pairs_flow = list(
            tqdm(pool.imap(run_farneback, all_pairs),
                 total=len(all_pairs),
                 desc='Computing flow',
                 leave=False))
        pool.terminate()
    # No negative flow defined for the fist frame
    neg_flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2)))
    for frame_id in range(len(all_pairs_flow)):
        flow = all_pairs_flow[frame_id]
        flows.append(flow)
        neg_flows.append(_compute_neg_flow(flow))
    # no flow defined for last frame
    flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2)))
    return flows, neg_flows
示例#4
0
def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    scale_inds = np.random.randint(0,
                                   high=len(cfg.TRAIN.SCALES),
                                   size=num_images)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        ims = image_utils.read_image_video(roidb[i])
        for im_id, im in enumerate(ims):
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                                       [target_size],
                                                       cfg.TRAIN.MAX_SIZE)
            ims[im_id] = im[0]
        # Just taking the im_scale for the last im in ims is fine (all are same)
        im_scales.append(im_scale[0])
        processed_ims += ims

    # Create a blob to hold the input images
    blob = blob_utils.im_list_to_blob(processed_ims)
    return blob, im_scales
def compute_optical_flow(video_json_data):
    frames = [img_utils.read_image_video(el, key_frame_only=True)[0]
              for el, _ in video_json_data]
    if len(frames) == 0:
        return
    frames = [cv2.cvtColor(el.astype('uint8'), cv2.COLOR_BGR2GRAY) for el in frames]
    flows = []
    neg_flows = []
    all_pairs = [(frames[i], frames[i + 1]) for i in range(len(frames) - 1)]
    with closing(mp.Pool(32)) as pool:
        # https://stackoverflow.com/a/25968716/1492614
        all_pairs_flow = list(tqdm(pool.imap(run_farneback, all_pairs),
                                   total=len(all_pairs),
                                   desc='Computing flow',
                                   leave=False))
        pool.terminate()
    # No negative flow defined for the fist frame
    neg_flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2)))
    for frame_id in range(len(all_pairs_flow)):
        flow = all_pairs_flow[frame_id]
        flows.append(flow)
        neg_flows.append(_compute_neg_flow(flow))
    # no flow defined for last frame
    flows.append(np.zeros((frames[0].shape[0], frames[0].shape[1], 2)))
    return flows, neg_flows
示例#6
0
def vis_predictions(entry, cur_boxes, cur_poses):
    """ A simple function to visualize the tubes and keypoint predictions on
    the tubes.
    Args:
        entry (dict or image/video): An entry from roidb, or the image itself
            (if overlaying)
        cur_boxes (np.ndarray): Shape Nx(4T+1), last dimension is score.
        cur_keypoints (list of np.ndarray): List has N elements, each of which
            is 4x17T
    """
    if isinstance(entry, dict):
        frames = image_utils.read_image_video(entry)
    else:
        frames = entry
    if len(cur_boxes) == 0 or cur_boxes.size == 0 or len(cur_poses) == 0:
        return frames
    time_dim = (cur_boxes.shape[-1] - 1) // 4
    num_keypoints = cur_poses[0].shape[-1] // time_dim
    res = []
    for t in range(time_dim):
        pred = vis_one_image_opencv(
            frames[t],
            cur_boxes[:, np.array(range(4 * t, 4 * (t + 1)) + [-1])],
            keypoints=[
                el[..., t * num_keypoints:(t + 1) * num_keypoints]
                for el in cur_poses
            ],
            show_box=True)
        res.append(pred)
    return res
def _generate_visualizations(entry, ix, all_boxes, all_keyps, all_tracks, thresh):
    im = image_utils.read_image_video(entry, key_frame_only=True)[0]
    cls_boxes_i = [
        _id_or_index(ix, all_boxes[j]) for j in range(len(all_boxes))]
    if all_keyps is not None:
        cls_keyps_i = [
            _id_or_index(ix, all_keyps[j]) for j in range(len(all_keyps))]
    else:
        cls_keyps_i = None
    if all_tracks is not None:
        cls_tracks_i = [
            _id_or_index(ix, all_tracks[j]) for j in range(len(all_tracks))]
    else:
        cls_tracks_i = None
    pred = _vis_single_frame(
        im.copy(), cls_boxes_i, None, cls_keyps_i, cls_tracks_i, thresh)
    gt = _vis_single_frame(
        im.copy(),
        [[], _convert_roidb_to_pred_boxes(entry['boxes'])],
        None,
        [[], _convert_roidb_to_pred_keyps(entry['gt_keypoints'])],
        [[], _convert_roidb_to_pred_tracks(entry['tracks'])],
        0.1)
    return gt, pred
示例#8
0
def test_net(ind_range=None):
    assert cfg.TEST.WEIGHTS != '', \
        'TEST.WEIGHTS must be set to the model file to test'
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    assert cfg.TEST.DATASET != '', \
        'TEST.DATASET must be set to the dataset name to test'

    output_dir = get_output_dir(training=False)
    roidb, dataset, start_ind, end_ind, total_num_images = \
        get_roidb_and_dataset(ind_range)
    model = initialize_model_from_cfg()
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, cfg.ROOT_GPU_ID)
    name_scope = 'gpu_{}'.format(cfg.ROOT_GPU_ID)
    for i, entry in enumerate(roidb):
        if cfg.MODEL.FASTER_RCNN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue

        im = image_utils.read_image_video(entry)
        with core.NameScope(name_scope):
            with core.DeviceScope(gpu_dev):
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(('im_detect: range [{:d}, {:d}] of {:d}: '
                         '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                             start_ind + 1, end_ind, total_num_images,
                             start_ind + i + 1, start_ind + num_images,
                             det_time, misc_time, eta))

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(im[:, :, ::-1],
                                    '{:d}_{:s}'.format(i, im_name),
                                    os.path.join(output_dir, 'vis'),
                                    cls_boxes_i,
                                    segms=cls_segms_i,
                                    keypoints=cls_keyps_i,
                                    thresh=cfg.VIS_THR,
                                    box_alpha=0.8,
                                    dataset=dataset,
                                    show_class=True)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    robust_pickle_dump(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml), det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps
示例#9
0
def test_net(ind_range=None):
    assert cfg.TEST.WEIGHTS != '', \
        'TEST.WEIGHTS must be set to the model file to test'
    assert not cfg.MODEL.RPN_ONLY, \
        'Use rpn_generate to generate proposals from RPN-only models'
    assert cfg.TEST.DATASET != '', \
        'TEST.DATASET must be set to the dataset name to test'

    output_dir = get_output_dir(training=False)
    roidb, dataset, start_ind, end_ind, total_num_images = \
        get_roidb_and_dataset(ind_range)
    model = initialize_model_from_cfg()
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
    timers = defaultdict(Timer)
    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, cfg.ROOT_GPU_ID)
    name_scope = 'gpu_{}'.format(cfg.ROOT_GPU_ID)
    for i, entry in enumerate(roidb):
        if cfg.MODEL.FASTER_RCNN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select only the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
            if len(box_proposals) == 0:
                continue

        im = image_utils.read_image_video(entry)
        with core.NameScope(name_scope):
            with core.DeviceScope(gpu_dev):
                cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
                    model, im, box_proposals, timers)

        extend_results(i, all_boxes, cls_boxes_i)
        if cls_segms_i is not None:
            extend_results(i, all_segms, cls_segms_i)
        if cls_keyps_i is not None:
            extend_results(i, all_keyps, cls_keyps_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (timers['im_detect_bbox'].average_time +
                        timers['im_detect_mask'].average_time +
                        timers['im_detect_keypoints'].average_time)
            misc_time = (timers['misc_bbox'].average_time +
                         timers['misc_mask'].average_time +
                         timers['misc_keypoints'].average_time)
            logger.info(
                ('im_detect: range [{:d}, {:d}] of {:d}: '
                 '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})').format(
                    start_ind + 1, end_ind, total_num_images,
                    start_ind + i + 1, start_ind + num_images,
                    det_time, misc_time, eta))

        if cfg.VIS:
            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
            vis_utils.vis_one_image(
                im[:, :, ::-1], '{:d}_{:s}'.format(i, im_name),
                os.path.join(output_dir, 'vis'), cls_boxes_i,
                segms=cls_segms_i, keypoints=cls_keyps_i,
                thresh=cfg.VIS_THR,
                box_alpha=0.8, dataset=dataset, show_class=True)

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    robust_pickle_dump(
        dict(all_boxes=all_boxes,
             all_segms=all_segms,
             all_keyps=all_keyps,
             cfg=cfg_yaml),
        det_file)
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_boxes, all_segms, all_keyps