def roi_propagation(vid_proto, box_proto, net, det_fun=im_detect, scheme='max', length=None, sample_rate=1, offset=0, cls_indices=None, keep_feat=False, batch_size=1024): track_proto = {} track_proto['video'] = vid_proto['video'] track_proto['method'] = 'roi_propagation' max_frame = vid_proto['frames'][-1]['frame'] if not length: length = max_frame tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate, offset) for idx, frame in enumerate(vid_proto['frames'], start=1): # Load the demo image image_name = frame_path_at(vid_proto, frame['frame']) im = imread(image_name) # Detect all object classes and regress object bounds # extract rois on the current frame rois, track_index = _cur_rois(tracks, frame['frame']) if len(rois) == 0: continue timer = Timer() timer.tic() # scores: n x c, boxes: n x (c x 4) scores, boxes, features = _batch_im_detect(net, im, rois, det_fun, batch_size) if not keep_feat: features = None if cls_indices is not None: boxes = boxes[:, cls_indices, :] scores = scores[:, cls_indices] # scores normalization scores = scores / np.sum(scores, axis=1, keepdims=True) # propagation schemes pred_boxes = score_guided_box_merge(scores, boxes, scheme) # update track bbox _update_track(tracks, boxes, pred_boxes, scores, features, track_index, frame['frame']) timer.toc() print('Frame {}: Detection took {:.3f}s for ' '{:d} object proposals').format(frame['frame'], timer.total_time, len(rois)) track_proto['tracks'] = tracks return track_proto
def show_tracks(vid_proto, track_proto): for frame in vid_proto['frames']: img = imread(frame_path_at(vid_proto, frame['frame'])) boxes = [track_box_at_frame(tracklet, frame['frame']) \ for tracklet in track_proto['tracks']] tracked = add_bbox(img, boxes, None, None, 2) cv2.imshow('tracks', tracked) if cv2.waitKey(0) == ord('q'): cv2.destroyAllWindows() sys.exit(0) cv2.destroyAllWindows()
def show_track_res(track_res, vid_proto): cv2.namedWindow('tracks') for frame_res in track_res: if frame_res['frame'] == -1: break frame = frame_res['frame'] img = imread(frame_path_at(vid_proto, frame)) boxes = frame_res['roi'].tolist() tracked = add_bbox(img, boxes, None, None, 2) cv2.imshow('tracks', tracked) if cv2.waitKey(0) == ord('q'): cv2.destroyAllWindows() sys.exit(0) cv2.destroyAllWindows()
def track_propagation(vid_proto, track_proto, net, det_fun=im_detect, cls_indices=None, keep_feat=False, batch_size=1024): new_track_proto = {} new_track_proto['video'] = vid_proto['video'] new_track_proto['method'] = 'track_propagation' tracks = copy.copy(track_proto['tracks']) for idx, frame in enumerate(vid_proto['frames'], start=1): # Load the demo image image_name = frame_path_at(vid_proto, frame['frame']) im = imread(image_name) # Detect all object classes and regress object bounds # extract rois on the current frame rois, track_index = _cur_rois(tracks, frame['frame']) if len(rois) == 0: continue timer = Timer() timer.tic() # scores: n x c, boxes: n x (c x 4) scores, boxes, features = _batch_im_detect(net, im, rois, det_fun, batch_size) if not keep_feat: features = None if cls_indices is not None: scores = scores[:, cls_indices] # scores normalization scores = scores / np.sum(scores, axis=1, keepdims=True) # update track scores and boxes _update_track_scores_boxes(tracks, scores, boxes, features, track_index, frame['frame']) timer.toc() print('Frame {}: Detection took {:.3f}s for ' '{:d} object proposals').format(frame['frame'], timer.total_time, len(rois)) new_track_proto['tracks'] = tracks return new_track_proto
def naive_box_regression(net_rpn, net_no_rpn, vid_proto, scheme='max', class_idx=None): """Generating tubelet proposals based on the region proposals of first frame.""" track_proto = {} track_proto['video'] = vid_proto['video'] track_proto['method'] = 'naive_box_regression' tracks = [] pred_boxes = None for idx, frame in enumerate(vid_proto['frames'], start=1): # Load the demo image image_name = frame_path_at(vid_proto, frame['frame']) im = imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() if idx == 1: scores, boxes = im_detect(net_rpn, im, pred_boxes) else: scores, boxes = im_detect(net_no_rpn, im, pred_boxes) boxes = boxes.reshape((boxes.shape[0], -1, 4)) if scheme is 'mean' or idx == 1: # use mean regressions as predictios pred_boxes = np.mean(boxes, axis=1) elif scheme is 'max': # use the regressions of the class with the maximum probability # excluding __background__ class max_cls = scores[:, 1:].argmax(axis=1) + 1 pred_boxes = boxes[np.arange(len(boxes)), max_cls, :] else: # use class specific regression as predictions pred_boxes = boxes[:, class_idx, :] _append_boxes(tracks, frame['frame'], pred_boxes, scores) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) track_proto['tracks'] = tracks return track_proto
parser.add_argument('--top_k', default=10, type=int) parser.add_argument('--no_nms', action='store_false', dest='nms') parser.set_defaults(nms=True) args = parser.parse_args() vid_proto = proto_load(args.vid_file) if args.save_dir and not os.path.isdir(args.save_dir): os.mkdir(args.save_dir) cls_index = imagenet_vdet_class_idx[args.cls] for frame in vid_proto['frames']: det_file = os.path.join( args.det_root, "{}.mat".format(os.path.splitext(frame['path'])[0])) det = sio.loadmat(det_file) frame_idx = frame['frame'] img = imread(frame_path_at(vid_proto, frame_idx)) boxes = det['boxes'][:, cls_index, :].astype('single') scores = det['zs'][:, cls_index].astype('single') if args.nms: keep = nms(np.hstack((boxes, scores[:, np.newaxis])), 0.3) else: keep = range(len(boxes)) kept_boxes = [boxes[i, :] for i in keep] kept_scores = [scores[i] for i in keep] top_idx = np.argsort(np.asarray(kept_scores))[::-1] top_boxes = [kept_boxes[top_idx[i]] for i in \ xrange(min(args.top_k, len(kept_boxes)))] top_scores = [kept_scores[top_idx[i]] for i in \ xrange(min(args.top_k, len(kept_boxes)))] det_img = add_bbox(img, top_boxes, top_scores) cv2.imshow('detection', det_img)
# fids.remove(int(line[0])) # if len(fids) == 0: # print ("get all fids") # break print(len(ret)) ## match the gt with detection for each video ## show the gt # remove the current images os.system("rm saveImgs/*") for frame_index, frame in enumerate(vid['frames']): # print (frame) # print (vid['root_path']) imgpath = frame_path_at(vid, frame['frame']) imgbasename = os.path.basename(imgpath) imgsavepath = os.path.join('saveImgs/', imgbasename) img = cv2.imread(imgpath) # load gt boundbox and annotation # print (annot['annotations']) boxes = [ track_box_at_frame(tracklet, frame['frame']) for tracklet in [anno['track'] for anno in annot['annotations']] ] classes = [ track_class_at_frame(tracklet, frame['frame']) for tracklet in [anno['track'] for anno in annot['annotations']] ] # print (boxes)
cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, 0.3, force_cpu=True) for keep_id in keep: kept_track_ids.append(track_ids[inds[keep_id]]) kept_class.append(j) colors = unique_colors(len(kept_track_ids)) kept_tracks = [tracks[i] for i in kept_track_ids] idx = 0 while True: frame = vid_proto['frames'][idx] frame_id = frame['frame'] print "Frame id: {}".format(frame_id) img = imread(frame_path_at(vid_proto, frame['frame'])) boxes = [] scores = [] show_track_ids = [] cur_colors = [] cur_classes = [] for track_id, (class_id, track) in enumerate(zip(kept_class, kept_tracks)): if frame_id in track['frame']: boxes.append(track[args.box_key][track['frame'] == frame_id][ 0, class_id, :].tolist()) scores.append(track[args.score_key][track['frame'] == frame_id] [0, class_id].tolist()) cur_colors.append(colors[track_id]) cur_classes.append(imagenet_vdet_classes[class_id]) show_track_ids.append(track_id)
vid_proto = proto_load(args.vid_file) score_proto = proto_load(args.score_file) with open(args.image_set_file) as f: image_set = dict([line.strip().split() for line in f.readlines()]) vid_name = vid_proto['video'] assert vid_name == score_proto['video'] # build dict frame_to_image_name = {} for frame in vid_proto['frames']: frame_id = frame['frame'] frame_to_image_name[frame_id] = os.path.join( vid_name, os.path.splitext(frame['path'])[0]) # get image shape height, width = imread(frame_path_at(vid_proto, 1)).shape[:2] dets = [] for tubelet in score_proto['tubelets']: if tubelet['gt'] == 1: raise ValueError('Dangerous: Score file contains gt tracks!') class_index = tubelet['class_index'] for box in tubelet['boxes']: frame_idx = box['frame'] image_name = frame_to_image_name[frame_idx] frame_idx = image_set[image_name] bbox = map(lambda x:max(x,0), box['bbox']) bbox[0] = min(width - 1, bbox[0]) bbox[2] = min(width - 1, bbox[2]) bbox[1] = min(height - 1, bbox[1]) bbox[3] = min(height - 1, bbox[3])
score_proto = proto_load(args.score_file) with open(args.image_set_file) as f: image_set = dict([line.strip().split() for line in f.readlines()]) vid_name = vid_proto['video'] assert vid_name == score_proto['video'] # build dict frame_to_image_name = {} for frame in vid_proto['frames']: frame_id = frame['frame'] frame_to_image_name[frame_id] = os.path.join( vid_name, os.path.splitext(frame['path'])[0]) # get image shape height, width = imread(frame_path_at(vid_proto, 1)).shape[:2] dets = [] for tubelet in score_proto['tubelets']: if tubelet['gt'] == 1: raise ValueError('Dangerous: Score file contains gt tracks!') class_index = tubelet['class_index'] for box in tubelet['boxes']: frame_idx = box['frame'] image_name = frame_to_image_name[frame_idx] frame_idx = image_set[image_name] bbox = map(lambda x: max(x, 0), box['bbox']) bbox[0] = min(width - 1, bbox[0]) bbox[2] = min(width - 1, bbox[2]) bbox[1] = min(height - 1, bbox[1]) bbox[3] = min(height - 1, bbox[3])
def sequence_roi_propagation(vid_proto, box_proto, net, det_fun=sequence_im_detect, window=2, scheme='max', length=None, sample_rate=1, offset=0, keep_feat=False, batch_size=1024): track_proto = {} track_proto['video'] = vid_proto['video'] track_proto['method'] = 'sequence_roi_propagation' max_frame = vid_proto['frames'][-1]['frame'] if not length: length = max_frame tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate, offset) track_anchors = sorted(set([track[0]['frame'] for track in tracks])) sequence_frames = _sequence_frames(vid_proto, window, track_anchors, length) for idx, frames in enumerate(sequence_frames, start=1): # Load the demo image images = map(lambda x: imread(frame_path_at(vid_proto, x['frame'])), frames) # Detect all object classes and regress object bounds # extract rois on the current frame rois, track_index = _cur_rois(tracks, frames[0]['frame']) if len(rois) == 0: continue timer = Timer() timer.tic() # scores: n x 2, boxes: n x ((len-1) x 4), features: n x (len x f) scores, boxes, features = _batch_sequence_im_detect( net, images, rois, det_fun, batch_size) if not keep_feat: features = None # update track bbox boxes = boxes.reshape((len(rois), len(images) - 1, 4)) if keep_feat: features = features.reshape((len(rois), len(images), -1)) frame_ids = [frame['frame'] for frame in frames] prev_id = -1 for i in xrange(len(images)): frame_id = frames[i]['frame'] # stop when encounting duplicate frames if frame_id == prev_id: break prev_id = frame_id if i == 0: _update_track_by_key(tracks, 'bbox', rois, track_index, frame_id) else: # minus 1 because boxes[0] correspond to the second frame _update_track_by_key(tracks, 'bbox', boxes[:, i - 1, :].tolist(), track_index, frame_id) _update_track_by_key(tracks, 'roi', boxes[:, i - 1, :].tolist(), track_index, frame_id) if keep_feat: _update_track_by_key(tracks, 'feature', features[:, i, :].tolist(), track_index, frame_id) timer.toc() print( 'Frame {}-{}: Detection took {:.3f}s for ' '{:d} object proposals').format(frame_ids[0], frame_ids[-1], timer.total_time, len(rois)) track_proto['tracks'] = tracks return track_proto
def roi_train_propagation(vid_proto, box_proto, net, det_fun=im_detect, cls_indices=None, scheme='weighted', num_tracks=16, length=20, fg_ratio=None, batch_size=16): assert vid_proto['video'] == box_proto['video'] # calculate the number of boxes on each frame all_boxes = {} for frame in vid_proto['frames']: frame_id = frame['frame'] boxes = boxes_at_frame(box_proto, frame_id) if len(boxes) >= num_tracks: all_boxes[frame_id] = boxes try: st_frame = random.choice(all_boxes.keys()) except: raise ValueError('{} has not valid frames for tracking.'.format( vid_proto['video'])) st_boxes = _sample_boxes(all_boxes[st_frame], num_tracks, fg_ratio) results = [{'frame': -1} for i in xrange(length)] anchor = 0 for frame in vid_proto['frames']: frame_id = frame['frame'] if frame_id < st_frame: continue if anchor >= length: break res = results[anchor] res['frame'] = frame_id if anchor == 0: res['roi'] = np.asarray([st_box['bbox'] for st_box in st_boxes]) # Load the demo image image_name = frame_path_at(vid_proto, frame_id) im = imread(image_name) # Detect all object classes and regress object bounds # extract rois on the current frame rois = res['roi'] assert rois.shape[0] == num_tracks timer = Timer() timer.tic() # scores: n x c, boxes: n x (c x 4), features: n * c scores, boxes, features = _batch_im_detect(net, im, rois, det_fun, batch_size) if cls_indices is not None: boxes = boxes[:, cls_indices, :] scores = scores[:, cls_indices] # scores normalization scores = scores / np.sum(scores, axis=1, keepdims=True) # propagation schemes pred_boxes = score_guided_box_merge(scores, boxes, scheme) results[anchor]['bbox'] = boxes results[anchor]['feat'] = features if anchor + 1 < length: results[anchor + 1]['roi'] = pred_boxes anchor += 1 return results
import cv2 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('vid_file') parser.add_argument('annot_file') parser.add_argument('--save_dir', default=None) args = parser.parse_args() vid_proto = proto_load(args.vid_file) annot_proto = proto_load(args.annot_file) colors = unique_colors(len(annot_proto['annotations'])) for frame in vid_proto['frames']: img = imread(frame_path_at(vid_proto, frame['frame'])) boxes = [track_box_at_frame(tracklet, frame['frame']) \ for tracklet in [annot['track'] for annot in annot_proto['annotations']]] tracked = add_bbox(img, boxes, None, 10) if args.save_dir: if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) imwrite(os.path.join(args.save_dir, "{:04d}.jpg".format(frame['frame'])), tracked) else: cv2.imshow('tracks', tracked) if cv2.waitKey(0) == ord('q'): cv2.destroyAllWindows() sys.exit(0) cv2.destroyAllWindows()
parser.add_argument('--merge', dest='merge', action='store_true') parser.add_argument('--debug', dest='visual_debug', action='store_true') parser.set_defaults(merge=False, visual_debug=False) args = parser.parse_args() norm_width = 500. bound = args.bound vid_proto = proto_load(args.vid_file) print("Processing {}: {} files... ".format(args.vid_file, len(vid_proto['frames']))), sys.stdout.flush() tic = time.time() for frame1, frame2 in zip(vid_proto['frames'][:-1], vid_proto['frames'][1:]): img_path = frame_path_at(vid_proto, frame1['frame']) img1 = cvReadGrayImg(img_path) img2 = cvReadGrayImg(frame_path_at(vid_proto, frame2['frame'])) h, w = img1.shape fxy = norm_width / w # normalize image size flow = cv2.calcOpticalFlowFarneback( cv2.resize(img1, None, fx=fxy, fy=fxy), cv2.resize(img2, None, fx=fxy, fy=fxy), 0.5, 3, 15, 3, 7, 1.5, 0) # map optical flow back flow = flow / fxy # normalization flow = np.round((flow + bound) / (2. * bound) * 255.) flow[flow < 0] = 0 flow[flow > 255] = 255 flow = cv2.resize(flow, (w, h))
sys.path.insert(0, osp.join(this_dir, '../../external/')) from vdetlib.utils.protocol import proto_load, frame_path_at, annots_at_frame import shutil if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('vid_proto') parser.add_argument('annot_proto') parser.add_argument('save_dir') args = parser.parse_args() vid_proto = proto_load(args.vid_proto) annot_proto = proto_load(args.annot_proto) if not osp.isdir(args.save_dir): os.makedirs(args.save_dir) for frame in vid_proto['frames']: frame_id = frame['frame'] image_path = frame_path_at(vid_proto, frame_id) annots = annots_at_frame(annot_proto, frame_id) cls_idx = [annot['class_index'] for annot in annots] uniq_cls = set(cls_idx) for cls in uniq_cls: save_dir = osp.join(args.save_dir, "{:02d}".format(cls)) if not osp.isdir(save_dir): os.makedirs(save_dir) save_path = osp.join(save_dir, '_'.join(image_path.split('/')[-2:])) shutil.copyfile(image_path, save_path)
args = parser.parse_args() manual_boxes = [{"class": cls, "boxes": []} for cls in imagenet_vdet_classes[1:]] for annot_file in [line.strip() for line in open(args.annot_list)]: print "Processing {}".format(annot_file) with open(annot_file) as f: annot = json.load(f) vid_file = os.path.join(args.vid_dir, annot['video']+'.vid') assert os.path.isfile(vid_file) with open(vid_file) as f: vid_proto = json.load(f) assert vid_proto['video'] == annot['video'] frames = [frame for track in annot['annotations'] for frame in track['track']] for frame in frames: cls_idx = frame['class_index'] assert manual_boxes[cls_idx-1]['class'] == frame['class'] if frame['generated'] == 1: # skip generated boxes continue # manually labeled boxes frame_path = frame_path_at(vid_proto, frame['frame']) manual_boxes[cls_idx-1]['boxes'].append( [frame_path] + frame['bbox']) for count in manual_boxes: save_file = os.path.join(args.save_dir, count['class']+'_manual_box_list.txt') with open(save_file, 'w') as f: print "Writing to {}".format(save_file) for box in count['boxes']: f.write('\t'.join(map(str,box))+'\n')