Exemplo n.º 1
0
def roi_propagation(vid_proto,
                    box_proto,
                    net,
                    det_fun=im_detect,
                    scheme='max',
                    length=None,
                    sample_rate=1,
                    offset=0,
                    cls_indices=None,
                    keep_feat=False,
                    batch_size=1024):
    track_proto = {}
    track_proto['video'] = vid_proto['video']
    track_proto['method'] = 'roi_propagation'
    max_frame = vid_proto['frames'][-1]['frame']
    if not length: length = max_frame
    tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate,
                                 offset)

    for idx, frame in enumerate(vid_proto['frames'], start=1):
        # Load the demo image
        image_name = frame_path_at(vid_proto, frame['frame'])
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois, track_index = _cur_rois(tracks, frame['frame'])
        if len(rois) == 0: continue

        timer = Timer()
        timer.tic()

        # scores: n x c, boxes: n x (c x 4)
        scores, boxes, features = _batch_im_detect(net, im, rois, det_fun,
                                                   batch_size)

        if not keep_feat:
            features = None
        if cls_indices is not None:
            boxes = boxes[:, cls_indices, :]
            scores = scores[:, cls_indices]
            # scores normalization
            scores = scores / np.sum(scores, axis=1, keepdims=True)

        # propagation schemes
        pred_boxes = score_guided_box_merge(scores, boxes, scheme)

        # update track bbox
        _update_track(tracks, boxes, pred_boxes, scores, features, track_index,
                      frame['frame'])
        timer.toc()
        print('Frame {}: Detection took {:.3f}s for '
              '{:d} object proposals').format(frame['frame'], timer.total_time,
                                              len(rois))
    track_proto['tracks'] = tracks
    return track_proto
Exemplo n.º 2
0
def show_tracks(vid_proto, track_proto):
    for frame in vid_proto['frames']:
        img = imread(frame_path_at(vid_proto, frame['frame']))
        boxes = [track_box_at_frame(tracklet, frame['frame']) \
                for tracklet in track_proto['tracks']]
        tracked = add_bbox(img, boxes, None, None, 2)
        cv2.imshow('tracks', tracked)
        if cv2.waitKey(0) == ord('q'):
            cv2.destroyAllWindows()
            sys.exit(0)
    cv2.destroyAllWindows()
Exemplo n.º 3
0
def show_track_res(track_res, vid_proto):
    cv2.namedWindow('tracks')
    for frame_res in track_res:
        if frame_res['frame'] == -1: break
        frame = frame_res['frame']
        img = imread(frame_path_at(vid_proto, frame))
        boxes = frame_res['roi'].tolist()
        tracked = add_bbox(img, boxes, None, None, 2)
        cv2.imshow('tracks', tracked)
        if cv2.waitKey(0) == ord('q'):
            cv2.destroyAllWindows()
            sys.exit(0)
    cv2.destroyAllWindows()
Exemplo n.º 4
0
def track_propagation(vid_proto,
                      track_proto,
                      net,
                      det_fun=im_detect,
                      cls_indices=None,
                      keep_feat=False,
                      batch_size=1024):
    new_track_proto = {}
    new_track_proto['video'] = vid_proto['video']
    new_track_proto['method'] = 'track_propagation'
    tracks = copy.copy(track_proto['tracks'])

    for idx, frame in enumerate(vid_proto['frames'], start=1):
        # Load the demo image
        image_name = frame_path_at(vid_proto, frame['frame'])
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois, track_index = _cur_rois(tracks, frame['frame'])
        if len(rois) == 0: continue

        timer = Timer()
        timer.tic()

        # scores: n x c, boxes: n x (c x 4)
        scores, boxes, features = _batch_im_detect(net, im, rois, det_fun,
                                                   batch_size)

        if not keep_feat:
            features = None
        if cls_indices is not None:
            scores = scores[:, cls_indices]
            # scores normalization
            scores = scores / np.sum(scores, axis=1, keepdims=True)

        # update track scores and boxes
        _update_track_scores_boxes(tracks, scores, boxes, features,
                                   track_index, frame['frame'])
        timer.toc()
        print('Frame {}: Detection took {:.3f}s for '
              '{:d} object proposals').format(frame['frame'], timer.total_time,
                                              len(rois))
    new_track_proto['tracks'] = tracks
    return new_track_proto
Exemplo n.º 5
0
def naive_box_regression(net_rpn,
                         net_no_rpn,
                         vid_proto,
                         scheme='max',
                         class_idx=None):
    """Generating tubelet proposals based on the region proposals of first frame."""

    track_proto = {}
    track_proto['video'] = vid_proto['video']
    track_proto['method'] = 'naive_box_regression'
    tracks = []
    pred_boxes = None

    for idx, frame in enumerate(vid_proto['frames'], start=1):
        # Load the demo image
        image_name = frame_path_at(vid_proto, frame['frame'])
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        timer = Timer()
        timer.tic()
        if idx == 1:
            scores, boxes = im_detect(net_rpn, im, pred_boxes)
        else:
            scores, boxes = im_detect(net_no_rpn, im, pred_boxes)

        boxes = boxes.reshape((boxes.shape[0], -1, 4))
        if scheme is 'mean' or idx == 1:
            # use mean regressions as predictios
            pred_boxes = np.mean(boxes, axis=1)
        elif scheme is 'max':
            # use the regressions of the class with the maximum probability
            # excluding __background__ class
            max_cls = scores[:, 1:].argmax(axis=1) + 1
            pred_boxes = boxes[np.arange(len(boxes)), max_cls, :]
        else:
            # use class specific regression as predictions
            pred_boxes = boxes[:, class_idx, :]
        _append_boxes(tracks, frame['frame'], pred_boxes, scores)
        timer.toc()
        print('Detection took {:.3f}s for '
              '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    track_proto['tracks'] = tracks
    return track_proto
            vid_name, frame_idx = frame_name.split('/')
            frame_idx = int(frame_idx)  # 0-based
            need_propagate = (frame_idx % args.sample_rate == 0)

            if (global_idx + 1) % 1000 == 0:
                end_time = time.time()
                print "{} frames processed: {} s".format(
                    global_idx + 1, end_time - start_time)
                start_time = time.time()

            if not need_propagate or frame_det.shape[0] == 0: continue

            # read optical flows
            # rgb is reversed to bgr when using opencv
            flow_file = os.path.join(args.flow_dir, frame_name + '.png')
            optflow = imread(flow_file)[:, :, ::-1]
            x_map = optflow_transform(optflow[:, :, 0])
            y_map = optflow_transform(optflow[:, :, 1])
            n_row, n_col = x_map.shape

            # compute motion shift
            boxes = frame_det[:, :4]
            scores = frame_det[:, [4]]
            num_boxes = boxes.shape[0]
            box_avg_x = _boxes_average_sum(x_map, boxes)
            box_avg_x = box_avg_x.reshape((num_boxes, 1))
            box_avg_y = _boxes_average_sum(y_map, boxes)
            box_avg_y = box_avg_y.reshape((num_boxes, 1))
            motion_shift = np.concatenate(
                (box_avg_x, box_avg_y, box_avg_x, box_avg_y), axis=1)
Exemplo n.º 7
0
    parser.add_argument('--top_k', default=10, type=int)
    parser.add_argument('--no_nms', action='store_false', dest='nms')
    parser.set_defaults(nms=True)
    args = parser.parse_args()

    vid_proto = proto_load(args.vid_file)
    if args.save_dir and not os.path.isdir(args.save_dir):
        os.mkdir(args.save_dir)
    cls_index = imagenet_vdet_class_idx[args.cls]

    for frame in vid_proto['frames']:
        det_file = os.path.join(
            args.det_root, "{}.mat".format(os.path.splitext(frame['path'])[0]))
        det = sio.loadmat(det_file)
        frame_idx = frame['frame']
        img = imread(frame_path_at(vid_proto, frame_idx))
        boxes = det['boxes'][:, cls_index, :].astype('single')
        scores = det['zs'][:, cls_index].astype('single')
        if args.nms:
            keep = nms(np.hstack((boxes, scores[:, np.newaxis])), 0.3)
        else:
            keep = range(len(boxes))
        kept_boxes = [boxes[i, :] for i in keep]
        kept_scores = [scores[i] for i in keep]
        top_idx = np.argsort(np.asarray(kept_scores))[::-1]
        top_boxes = [kept_boxes[top_idx[i]] for i in \
            xrange(min(args.top_k, len(kept_boxes)))]
        top_scores = [kept_scores[top_idx[i]] for i in \
            xrange(min(args.top_k, len(kept_boxes)))]
        det_img = add_bbox(img, top_boxes, top_scores)
        cv2.imshow('detection', det_img)
Exemplo n.º 8
0
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, 0.3, force_cpu=True)
            for keep_id in keep:
                kept_track_ids.append(track_ids[inds[keep_id]])
                kept_class.append(j)

    colors = unique_colors(len(kept_track_ids))
    kept_tracks = [tracks[i] for i in kept_track_ids]
    idx = 0
    while True:
        frame = vid_proto['frames'][idx]
        frame_id = frame['frame']
        print "Frame id: {}".format(frame_id)
        img = imread(frame_path_at(vid_proto, frame['frame']))
        boxes = []
        scores = []
        show_track_ids = []
        cur_colors = []
        cur_classes = []
        for track_id, (class_id,
                       track) in enumerate(zip(kept_class, kept_tracks)):
            if frame_id in track['frame']:
                boxes.append(track[args.box_key][track['frame'] == frame_id][
                    0, class_id, :].tolist())
                scores.append(track[args.score_key][track['frame'] == frame_id]
                              [0, class_id].tolist())
                cur_colors.append(colors[track_id])
                cur_classes.append(imagenet_vdet_classes[class_id])
                show_track_ids.append(track_id)
Exemplo n.º 9
0
def sequence_roi_propagation(vid_proto,
                             box_proto,
                             net,
                             det_fun=sequence_im_detect,
                             window=2,
                             scheme='max',
                             length=None,
                             sample_rate=1,
                             offset=0,
                             keep_feat=False,
                             batch_size=1024):
    track_proto = {}
    track_proto['video'] = vid_proto['video']
    track_proto['method'] = 'sequence_roi_propagation'
    max_frame = vid_proto['frames'][-1]['frame']
    if not length: length = max_frame
    tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate,
                                 offset)

    track_anchors = sorted(set([track[0]['frame'] for track in tracks]))
    sequence_frames = _sequence_frames(vid_proto, window, track_anchors,
                                       length)
    for idx, frames in enumerate(sequence_frames, start=1):
        # Load the demo image
        images = map(lambda x: imread(frame_path_at(vid_proto, x['frame'])),
                     frames)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois, track_index = _cur_rois(tracks, frames[0]['frame'])
        if len(rois) == 0: continue

        timer = Timer()
        timer.tic()

        # scores: n x 2, boxes: n x ((len-1) x 4), features: n x (len x f)
        scores, boxes, features = _batch_sequence_im_detect(
            net, images, rois, det_fun, batch_size)

        if not keep_feat:
            features = None

        # update track bbox
        boxes = boxes.reshape((len(rois), len(images) - 1, 4))
        if keep_feat:
            features = features.reshape((len(rois), len(images), -1))
        frame_ids = [frame['frame'] for frame in frames]
        prev_id = -1
        for i in xrange(len(images)):
            frame_id = frames[i]['frame']
            # stop when encounting duplicate frames
            if frame_id == prev_id:
                break
            prev_id = frame_id
            if i == 0:
                _update_track_by_key(tracks, 'bbox', rois, track_index,
                                     frame_id)
            else:
                # minus 1 because boxes[0] correspond to the second frame
                _update_track_by_key(tracks, 'bbox', boxes[:,
                                                           i - 1, :].tolist(),
                                     track_index, frame_id)
                _update_track_by_key(tracks, 'roi', boxes[:,
                                                          i - 1, :].tolist(),
                                     track_index, frame_id)
            if keep_feat:
                _update_track_by_key(tracks, 'feature',
                                     features[:, i, :].tolist(), track_index,
                                     frame_id)
        timer.toc()
        print(
            'Frame {}-{}: Detection took {:.3f}s for '
            '{:d} object proposals').format(frame_ids[0], frame_ids[-1],
                                            timer.total_time, len(rois))
    track_proto['tracks'] = tracks
    return track_proto
Exemplo n.º 10
0
def roi_train_propagation(vid_proto,
                          box_proto,
                          net,
                          det_fun=im_detect,
                          cls_indices=None,
                          scheme='weighted',
                          num_tracks=16,
                          length=20,
                          fg_ratio=None,
                          batch_size=16):
    assert vid_proto['video'] == box_proto['video']
    # calculate the number of boxes on each frame
    all_boxes = {}
    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        boxes = boxes_at_frame(box_proto, frame_id)
        if len(boxes) >= num_tracks: all_boxes[frame_id] = boxes

    try:
        st_frame = random.choice(all_boxes.keys())
    except:
        raise ValueError('{} has not valid frames for tracking.'.format(
            vid_proto['video']))
    st_boxes = _sample_boxes(all_boxes[st_frame], num_tracks, fg_ratio)

    results = [{'frame': -1} for i in xrange(length)]
    anchor = 0
    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        if frame_id < st_frame: continue
        if anchor >= length: break

        res = results[anchor]
        res['frame'] = frame_id
        if anchor == 0:
            res['roi'] = np.asarray([st_box['bbox'] for st_box in st_boxes])

        # Load the demo image
        image_name = frame_path_at(vid_proto, frame_id)
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois = res['roi']
        assert rois.shape[0] == num_tracks

        timer = Timer()
        timer.tic()

        # scores: n x c, boxes: n x (c x 4), features: n * c
        scores, boxes, features = _batch_im_detect(net, im, rois, det_fun,
                                                   batch_size)

        if cls_indices is not None:
            boxes = boxes[:, cls_indices, :]
            scores = scores[:, cls_indices]
            # scores normalization
            scores = scores / np.sum(scores, axis=1, keepdims=True)

        # propagation schemes
        pred_boxes = score_guided_box_merge(scores, boxes, scheme)

        results[anchor]['bbox'] = boxes
        results[anchor]['feat'] = features
        if anchor + 1 < length:
            results[anchor + 1]['roi'] = pred_boxes
        anchor += 1
    return results
Exemplo n.º 11
0
import cv2

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('vid_file')
    parser.add_argument('annot_file')
    parser.add_argument('--save_dir', default=None)
    args = parser.parse_args()

    vid_proto = proto_load(args.vid_file)
    annot_proto = proto_load(args.annot_file)

    colors = unique_colors(len(annot_proto['annotations']))

    for frame in vid_proto['frames']:
        img = imread(frame_path_at(vid_proto, frame['frame']))
        boxes = [track_box_at_frame(tracklet, frame['frame']) \
                for tracklet in [annot['track'] for annot in annot_proto['annotations']]]
        tracked = add_bbox(img, boxes, None, 10)
        if args.save_dir:
            if not os.path.isdir(args.save_dir):
                os.makedirs(args.save_dir)
            imwrite(os.path.join(args.save_dir, "{:04d}.jpg".format(frame['frame'])),
                    tracked)
        else:
            cv2.imshow('tracks', tracked)
            if cv2.waitKey(0) == ord('q'):
                cv2.destroyAllWindows()
                sys.exit(0)
            cv2.destroyAllWindows()