def roi_propagation(vid_proto, box_proto, net, det_fun=im_detect, scheme='max', length=None, sample_rate=1, offset=0, cls_indices=None, keep_feat=False, batch_size=1024): track_proto = {} track_proto['video'] = vid_proto['video'] track_proto['method'] = 'roi_propagation' max_frame = vid_proto['frames'][-1]['frame'] if not length: length = max_frame tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate, offset) for idx, frame in enumerate(vid_proto['frames'], start=1): # Load the demo image image_name = frame_path_at(vid_proto, frame['frame']) im = imread(image_name) # Detect all object classes and regress object bounds # extract rois on the current frame rois, track_index = _cur_rois(tracks, frame['frame']) if len(rois) == 0: continue timer = Timer() timer.tic() # scores: n x c, boxes: n x (c x 4) scores, boxes, features = _batch_im_detect(net, im, rois, det_fun, batch_size) if not keep_feat: features = None if cls_indices is not None: boxes = boxes[:, cls_indices, :] scores = scores[:, cls_indices] # scores normalization scores = scores / np.sum(scores, axis=1, keepdims=True) # propagation schemes pred_boxes = score_guided_box_merge(scores, boxes, scheme) # update track bbox _update_track(tracks, boxes, pred_boxes, scores, features, track_index, frame['frame']) timer.toc() print('Frame {}: Detection took {:.3f}s for ' '{:d} object proposals').format(frame['frame'], timer.total_time, len(rois)) track_proto['tracks'] = tracks return track_proto
def show_tracks(vid_proto, track_proto): for frame in vid_proto['frames']: img = imread(frame_path_at(vid_proto, frame['frame'])) boxes = [track_box_at_frame(tracklet, frame['frame']) \ for tracklet in track_proto['tracks']] tracked = add_bbox(img, boxes, None, None, 2) cv2.imshow('tracks', tracked) if cv2.waitKey(0) == ord('q'): cv2.destroyAllWindows() sys.exit(0) cv2.destroyAllWindows()
def show_track_res(track_res, vid_proto): cv2.namedWindow('tracks') for frame_res in track_res: if frame_res['frame'] == -1: break frame = frame_res['frame'] img = imread(frame_path_at(vid_proto, frame)) boxes = frame_res['roi'].tolist() tracked = add_bbox(img, boxes, None, None, 2) cv2.imshow('tracks', tracked) if cv2.waitKey(0) == ord('q'): cv2.destroyAllWindows() sys.exit(0) cv2.destroyAllWindows()
def track_propagation(vid_proto, track_proto, net, det_fun=im_detect, cls_indices=None, keep_feat=False, batch_size=1024): new_track_proto = {} new_track_proto['video'] = vid_proto['video'] new_track_proto['method'] = 'track_propagation' tracks = copy.copy(track_proto['tracks']) for idx, frame in enumerate(vid_proto['frames'], start=1): # Load the demo image image_name = frame_path_at(vid_proto, frame['frame']) im = imread(image_name) # Detect all object classes and regress object bounds # extract rois on the current frame rois, track_index = _cur_rois(tracks, frame['frame']) if len(rois) == 0: continue timer = Timer() timer.tic() # scores: n x c, boxes: n x (c x 4) scores, boxes, features = _batch_im_detect(net, im, rois, det_fun, batch_size) if not keep_feat: features = None if cls_indices is not None: scores = scores[:, cls_indices] # scores normalization scores = scores / np.sum(scores, axis=1, keepdims=True) # update track scores and boxes _update_track_scores_boxes(tracks, scores, boxes, features, track_index, frame['frame']) timer.toc() print('Frame {}: Detection took {:.3f}s for ' '{:d} object proposals').format(frame['frame'], timer.total_time, len(rois)) new_track_proto['tracks'] = tracks return new_track_proto
def naive_box_regression(net_rpn, net_no_rpn, vid_proto, scheme='max', class_idx=None): """Generating tubelet proposals based on the region proposals of first frame.""" track_proto = {} track_proto['video'] = vid_proto['video'] track_proto['method'] = 'naive_box_regression' tracks = [] pred_boxes = None for idx, frame in enumerate(vid_proto['frames'], start=1): # Load the demo image image_name = frame_path_at(vid_proto, frame['frame']) im = imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() if idx == 1: scores, boxes = im_detect(net_rpn, im, pred_boxes) else: scores, boxes = im_detect(net_no_rpn, im, pred_boxes) boxes = boxes.reshape((boxes.shape[0], -1, 4)) if scheme is 'mean' or idx == 1: # use mean regressions as predictios pred_boxes = np.mean(boxes, axis=1) elif scheme is 'max': # use the regressions of the class with the maximum probability # excluding __background__ class max_cls = scores[:, 1:].argmax(axis=1) + 1 pred_boxes = boxes[np.arange(len(boxes)), max_cls, :] else: # use class specific regression as predictions pred_boxes = boxes[:, class_idx, :] _append_boxes(tracks, frame['frame'], pred_boxes, scores) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) track_proto['tracks'] = tracks return track_proto
vid_name, frame_idx = frame_name.split('/') frame_idx = int(frame_idx) # 0-based need_propagate = (frame_idx % args.sample_rate == 0) if (global_idx + 1) % 1000 == 0: end_time = time.time() print "{} frames processed: {} s".format( global_idx + 1, end_time - start_time) start_time = time.time() if not need_propagate or frame_det.shape[0] == 0: continue # read optical flows # rgb is reversed to bgr when using opencv flow_file = os.path.join(args.flow_dir, frame_name + '.png') optflow = imread(flow_file)[:, :, ::-1] x_map = optflow_transform(optflow[:, :, 0]) y_map = optflow_transform(optflow[:, :, 1]) n_row, n_col = x_map.shape # compute motion shift boxes = frame_det[:, :4] scores = frame_det[:, [4]] num_boxes = boxes.shape[0] box_avg_x = _boxes_average_sum(x_map, boxes) box_avg_x = box_avg_x.reshape((num_boxes, 1)) box_avg_y = _boxes_average_sum(y_map, boxes) box_avg_y = box_avg_y.reshape((num_boxes, 1)) motion_shift = np.concatenate( (box_avg_x, box_avg_y, box_avg_x, box_avg_y), axis=1)
parser.add_argument('--top_k', default=10, type=int) parser.add_argument('--no_nms', action='store_false', dest='nms') parser.set_defaults(nms=True) args = parser.parse_args() vid_proto = proto_load(args.vid_file) if args.save_dir and not os.path.isdir(args.save_dir): os.mkdir(args.save_dir) cls_index = imagenet_vdet_class_idx[args.cls] for frame in vid_proto['frames']: det_file = os.path.join( args.det_root, "{}.mat".format(os.path.splitext(frame['path'])[0])) det = sio.loadmat(det_file) frame_idx = frame['frame'] img = imread(frame_path_at(vid_proto, frame_idx)) boxes = det['boxes'][:, cls_index, :].astype('single') scores = det['zs'][:, cls_index].astype('single') if args.nms: keep = nms(np.hstack((boxes, scores[:, np.newaxis])), 0.3) else: keep = range(len(boxes)) kept_boxes = [boxes[i, :] for i in keep] kept_scores = [scores[i] for i in keep] top_idx = np.argsort(np.asarray(kept_scores))[::-1] top_boxes = [kept_boxes[top_idx[i]] for i in \ xrange(min(args.top_k, len(kept_boxes)))] top_scores = [kept_scores[top_idx[i]] for i in \ xrange(min(args.top_k, len(kept_boxes)))] det_img = add_bbox(img, top_boxes, top_scores) cv2.imshow('detection', det_img)
cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, 0.3, force_cpu=True) for keep_id in keep: kept_track_ids.append(track_ids[inds[keep_id]]) kept_class.append(j) colors = unique_colors(len(kept_track_ids)) kept_tracks = [tracks[i] for i in kept_track_ids] idx = 0 while True: frame = vid_proto['frames'][idx] frame_id = frame['frame'] print "Frame id: {}".format(frame_id) img = imread(frame_path_at(vid_proto, frame['frame'])) boxes = [] scores = [] show_track_ids = [] cur_colors = [] cur_classes = [] for track_id, (class_id, track) in enumerate(zip(kept_class, kept_tracks)): if frame_id in track['frame']: boxes.append(track[args.box_key][track['frame'] == frame_id][ 0, class_id, :].tolist()) scores.append(track[args.score_key][track['frame'] == frame_id] [0, class_id].tolist()) cur_colors.append(colors[track_id]) cur_classes.append(imagenet_vdet_classes[class_id]) show_track_ids.append(track_id)
def sequence_roi_propagation(vid_proto, box_proto, net, det_fun=sequence_im_detect, window=2, scheme='max', length=None, sample_rate=1, offset=0, keep_feat=False, batch_size=1024): track_proto = {} track_proto['video'] = vid_proto['video'] track_proto['method'] = 'sequence_roi_propagation' max_frame = vid_proto['frames'][-1]['frame'] if not length: length = max_frame tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate, offset) track_anchors = sorted(set([track[0]['frame'] for track in tracks])) sequence_frames = _sequence_frames(vid_proto, window, track_anchors, length) for idx, frames in enumerate(sequence_frames, start=1): # Load the demo image images = map(lambda x: imread(frame_path_at(vid_proto, x['frame'])), frames) # Detect all object classes and regress object bounds # extract rois on the current frame rois, track_index = _cur_rois(tracks, frames[0]['frame']) if len(rois) == 0: continue timer = Timer() timer.tic() # scores: n x 2, boxes: n x ((len-1) x 4), features: n x (len x f) scores, boxes, features = _batch_sequence_im_detect( net, images, rois, det_fun, batch_size) if not keep_feat: features = None # update track bbox boxes = boxes.reshape((len(rois), len(images) - 1, 4)) if keep_feat: features = features.reshape((len(rois), len(images), -1)) frame_ids = [frame['frame'] for frame in frames] prev_id = -1 for i in xrange(len(images)): frame_id = frames[i]['frame'] # stop when encounting duplicate frames if frame_id == prev_id: break prev_id = frame_id if i == 0: _update_track_by_key(tracks, 'bbox', rois, track_index, frame_id) else: # minus 1 because boxes[0] correspond to the second frame _update_track_by_key(tracks, 'bbox', boxes[:, i - 1, :].tolist(), track_index, frame_id) _update_track_by_key(tracks, 'roi', boxes[:, i - 1, :].tolist(), track_index, frame_id) if keep_feat: _update_track_by_key(tracks, 'feature', features[:, i, :].tolist(), track_index, frame_id) timer.toc() print( 'Frame {}-{}: Detection took {:.3f}s for ' '{:d} object proposals').format(frame_ids[0], frame_ids[-1], timer.total_time, len(rois)) track_proto['tracks'] = tracks return track_proto
def roi_train_propagation(vid_proto, box_proto, net, det_fun=im_detect, cls_indices=None, scheme='weighted', num_tracks=16, length=20, fg_ratio=None, batch_size=16): assert vid_proto['video'] == box_proto['video'] # calculate the number of boxes on each frame all_boxes = {} for frame in vid_proto['frames']: frame_id = frame['frame'] boxes = boxes_at_frame(box_proto, frame_id) if len(boxes) >= num_tracks: all_boxes[frame_id] = boxes try: st_frame = random.choice(all_boxes.keys()) except: raise ValueError('{} has not valid frames for tracking.'.format( vid_proto['video'])) st_boxes = _sample_boxes(all_boxes[st_frame], num_tracks, fg_ratio) results = [{'frame': -1} for i in xrange(length)] anchor = 0 for frame in vid_proto['frames']: frame_id = frame['frame'] if frame_id < st_frame: continue if anchor >= length: break res = results[anchor] res['frame'] = frame_id if anchor == 0: res['roi'] = np.asarray([st_box['bbox'] for st_box in st_boxes]) # Load the demo image image_name = frame_path_at(vid_proto, frame_id) im = imread(image_name) # Detect all object classes and regress object bounds # extract rois on the current frame rois = res['roi'] assert rois.shape[0] == num_tracks timer = Timer() timer.tic() # scores: n x c, boxes: n x (c x 4), features: n * c scores, boxes, features = _batch_im_detect(net, im, rois, det_fun, batch_size) if cls_indices is not None: boxes = boxes[:, cls_indices, :] scores = scores[:, cls_indices] # scores normalization scores = scores / np.sum(scores, axis=1, keepdims=True) # propagation schemes pred_boxes = score_guided_box_merge(scores, boxes, scheme) results[anchor]['bbox'] = boxes results[anchor]['feat'] = features if anchor + 1 < length: results[anchor + 1]['roi'] = pred_boxes anchor += 1 return results
import cv2 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('vid_file') parser.add_argument('annot_file') parser.add_argument('--save_dir', default=None) args = parser.parse_args() vid_proto = proto_load(args.vid_file) annot_proto = proto_load(args.annot_file) colors = unique_colors(len(annot_proto['annotations'])) for frame in vid_proto['frames']: img = imread(frame_path_at(vid_proto, frame['frame'])) boxes = [track_box_at_frame(tracklet, frame['frame']) \ for tracklet in [annot['track'] for annot in annot_proto['annotations']]] tracked = add_bbox(img, boxes, None, 10) if args.save_dir: if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) imwrite(os.path.join(args.save_dir, "{:04d}.jpg".format(frame['frame'])), tracked) else: cv2.imshow('tracks', tracked) if cv2.waitKey(0) == ord('q'): cv2.destroyAllWindows() sys.exit(0) cv2.destroyAllWindows()