def det_process(opts, frame_recv, det_res_send, w_img, h_img): try: model = init_detector(opts) # warm up the GPU _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8)) torch.cuda.synchronize() while 1: fidx = frame_recv.recv() if type(fidx) is list: # new video, read all images in advance frame_list = fidx frames = [imread(img_path) for img_path in frame_list] # signal ready, no errors det_res_send.send('ready') continue elif fidx is None: # exit flag break fidx, t1 = fidx img = frames[fidx] t2 = perf_counter() t_send_frame = t2 - t1 result = inference_detector(model, img, gpu_pre=not opts.cpu_pre) torch.cuda.synchronize() t3 = perf_counter() det_res_send.send([result, t_send_frame, t3]) except Exception: # report all errors from the child process to the parent # forward traceback info as well det_res_send.send(Exception("".join(traceback.format_exception(*sys.exc_info()))))
def det_test(opts, scale, imgs): assert len(imgs) opts.in_scale = scale opts.no_mask = True model = init_detector(opts) # warm up the GPU _ = inference_detector(model, imgs[0]) torch.cuda.synchronize() runtime = [] for img in imgs: t1 = perf_counter() _ = inference_detector(model, img, gpu_pre=not opts.cpu_pre) torch.cuda.synchronize() t2 = perf_counter() runtime.append(t2 - t1) return sum(runtime) / len(runtime)
def main(): assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) vis_out = bool(opts.vis_dir) if vis_out: mkdir2(opts.vis_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get( 'coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) results_raw = [] # image based, all 80 COCO classes results_ccf = [] # instance based for iid, img in tqdm(db.imgs.items()): img_name = img['name'] sid = img['sid'] seq_name = seqs[sid] img_path = join(opts.data_root, seq_dirs[sid], img_name) I = imread(img_path) result = inference_detector(model, I, gpu_pre=not opts.cpu_pre) results_raw.append(result) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) if vis_out: vis_path = join(opts.vis_dir, seq_name, img_name[:-3] + 'jpg') if opts.overwrite or not isfile(vis_path): vis_det(I, bboxes, labels, class_names, masks, scores, out_scale=opts.vis_scale, out_file=vis_path) # convert to coco fmt n = len(bboxes) if n: ltrb2ltwh_(bboxes) for i in range(n): result_dict = { 'image_id': iid, 'bbox': bboxes[i], 'score': scores[i], 'category_id': labels[i], } if masks is not None: result_dict['segmentation'] = masks[i] results_ccf.append(result_dict) out_path = join(opts.out_dir, 'results_raw.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_raw, open(out_path, 'wb')) out_path = join(opts.out_dir, 'results_ccf.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(results_ccf, open(out_path, 'wb')) if not opts.no_eval: eval_summary = eval_ccf(db, results_ccf) out_path = join(opts.out_dir, 'eval_summary.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if opts.eval_mask: print('Evaluating instance segmentation') eval_summary = eval_ccf(db, results_ccf, iou_type='segm') out_path = join(opts.out_dir, 'eval_summary_mask.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(eval_summary, open(out_path, 'wb')) if vis_out: print(f'python vis/make_videos.py "{opts.vis_dir}"')
def main(): assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing opts = parse_args() assert (not opts.mask_timing) or (opts.mask_timing and not opts.no_mask) mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) # warm up the GPU img = db.imgs[0] w_img, h_img = img['width'], img['height'] _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8)) torch.cuda.synchronize() runtime_all = [] n_processed = 0 n_total = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) n_frame = len(frames) n_total += n_frame timestamps = [] results_raw = [] results_parsed = [] input_fidx = [] runtime = [] last_fidx = None if not opts.dynamic_schedule: stride_cnt = 0 t_total = n_frame/opts.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed = t1 - t_start if t_elapsed >= t_total: break # identify latest available frame fidx_continous = t_elapsed*opts.fps fidx = int(np.floor(fidx_continous)) if fidx == last_fidx: continue last_fidx = fidx if opts.dynamic_schedule: fidx_remainder = fidx_continous - fidx if fidx_remainder > 0.5: continue else: if stride_cnt % opts.det_stride == 0: stride_cnt = 1 else: stride_cnt += 1 continue frame = frames[fidx] result = inference_detector(model, frame, gpu_pre=not opts.cpu_pre, decode_mask=not opts.mask_timing) if opts.mask_timing: mask_encoded = result[2] result = result[:2] bboxes, scores, labels, _, sel = \ parse_det_result(result, coco_mapping, n_class, return_sel=True) else: bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) torch.cuda.synchronize() t2 = perf_counter() t_elapsed = t2 - t_start if t_elapsed >= t_total: break if opts.mask_timing: masks = decode_mask(mask_encoded, sel) timestamps.append(t_elapsed) results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(fidx) runtime.append(t2 - t1) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'results_raw': results_raw, 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, }, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_raw) runtime_all_np = np.asarray(runtime_all) n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3*x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')
def main(): opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] if opts.cached_res: cache_in_ccf = '_ccf' in basename(opts.cached_res) if cache_in_ccf: # speed up based on the assumption of sequential storage cache_end_idx = 0 cached_res = pickle.load(open(opts.cached_res, 'rb')) else: assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing model = init_detector(opts) np.random.seed(opts.seed) runtime = pickle.load(open(opts.runtime, 'rb')) runtime_dist = dist_from_dict(runtime, opts.perf_factor) runtime_all = [] n_processed = 0 n_total = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] n_frame = len(frame_list) n_total += n_frame if not opts.cached_res: # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) timestamps = [] results_parsed = [] input_fidx = [] runtime = [] last_fidx = None if opts.cached_res and cache_in_ccf: results_raw = None else: results_raw = [] t_total = n_frame/opts.fps t_elapsed = 0 if opts.dynamic_schedule: mean_rtf = runtime_dist.mean()*opts.fps else: stride_cnt = 0 while 1: if t_elapsed >= t_total: break # identify latest available frame fidx_continous = t_elapsed*opts.fps fidx = int(np.floor(fidx_continous)) if fidx == last_fidx: # algorithm is fast and has some idle time fidx += 1 if fidx == n_frame: break t_elapsed = fidx/opts.fps last_fidx = fidx if opts.dynamic_schedule: if mean_rtf > 1: # when runtime <= 1, it should always process every frame fidx_remainder = fidx_continous - fidx if mean_rtf < np.floor(fidx_remainder + mean_rtf): # wait till next frame continue else: if stride_cnt % opts.det_stride == 0: stride_cnt = 1 else: stride_cnt += 1 continue if opts.cached_res: img = frame_list[fidx] if cache_in_ccf: cache_end_idx, bboxes, scores, labels, masks = \ result_from_ccf(cached_res, img['id'], cache_end_idx) ltwh2ltrb_(bboxes) else: result = cached_res[img['id']] bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) else: frame = frames[fidx] result = inference_detector(model, frame) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) rt_this = runtime_dist.draw() t_elapsed += rt_this if t_elapsed >= t_total: break timestamps.append(t_elapsed) if results_raw is not None: results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(fidx) runtime.append(rt_this) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): out_dict = { 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, } if results_raw is not None: out_dict['results_raw'] = results_raw pickle.dump(out_dict, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_parsed) runtime_all_np = np.array(runtime_all) n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump({ 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3*x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')
def main(): assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] model = init_detector(opts) # warm up the GPU img = db.imgs[0] w_img, h_img = img['width'], img['height'] _ = inference_detector(model, np.zeros((h_img, w_img, 3), np.uint8)) torch.cuda.synchronize() runtime_all = [] n_processed = 0 n_total = 0 # global pr import cProfile pr = cProfile.Profile() for sid, seq in enumerate(tqdm(seqs)): # if sid > 1: # break frame_list = [img for img in db.imgs.values() if img['sid'] == sid] # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) n_frame = len(frames) n_total += n_frame timestamps = [] results = [] input_fidx = [] runtime = [] last_fidx = None t_total = n_frame / opts.fps t_start = perf_counter() while 1: t1 = perf_counter() t_elapsed = t1 - t_start if t_elapsed >= t_total: break # identify latest available frame fidx = int(np.floor(t_elapsed * opts.fps)) # t_elapsed/t_total *n_frame # = t_elapsed*opts.fps if fidx == last_fidx: continue last_fidx = fidx frame = frames[fidx] pr.enable() result = inference_detector(model, frame) torch.cuda.synchronize() pr.disable() t2 = perf_counter() t_elapsed = t2 - t_start if t_elapsed >= t_total: break timestamps.append(t_elapsed) results.append(result) input_fidx.append(fidx) runtime.append(t2 - t1) out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): pickle.dump(( results, timestamps, input_fidx, runtime, ), open(out_path, 'wb')) runtime_all += runtime n_processed += len(results) pr.dump_stats('_par_/mrcnn50_s0.5_1080ti_gpupre_blocking.prof') # pr.dump_stats('debug.prof') runtime_all_np = np.array(runtime_all) n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump( { 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display runtime_all_np *= 1e3 print(f'{n_processed}/{n_total} frames processed') print('Runtime (ms): mean: %g; std: %g; min: %g; max: %g' % ( runtime_all_np.mean(), runtime_all_np.std(ddof=1), runtime_all_np.min(), runtime_all_np.max(), )) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')
def main(): opts = parse_args() mkdir2(opts.out_dir) db = COCO(opts.annot_path) class_names = [c['name'] for c in db.dataset['categories']] n_class = len(class_names) coco_mapping = db.dataset.get('coco_mapping', None) if coco_mapping is not None: coco_mapping = np.asarray(coco_mapping) seqs = db.dataset['sequences'] seq_dirs = db.dataset['seq_dirs'] if opts.cached_res: cache_in_ccf = '_ccf' in basename(opts.cached_res) if cache_in_ccf: # speed up based on the assumption of sequential storage cache_end_idx = 0 cached_res = pickle.load(open(opts.cached_res, 'rb')) else: assert torch.cuda.device_count( ) == 1 # mmdet only supports single GPU testing model = init_detector(opts) np.random.seed(opts.seed) runtime = pickle.load(open(opts.runtime, 'rb')) runtime_dist = dist_from_dict(runtime, opts.perf_factor) runtime_all = [] n_processed = 0 n_total = 0 for sid, seq in enumerate(tqdm(seqs)): frame_list = [img for img in db.imgs.values() if img['sid'] == sid] n_frame = len(frame_list) n_total += n_frame if not opts.cached_res: # load all frames in advance frames = [] for img in frame_list: img_path = join(opts.data_root, seq_dirs[sid], img['name']) frames.append(imread(img_path)) timestamps = [] results_raw = [] results_parsed = [] input_fidx = [] runtime = [] if opts.cached_res and cache_in_ccf: results_raw = None else: results_raw = [] for ii in range(n_frame): t = ii / opts.fps if opts.cached_res: img = frame_list[ii] if cache_in_ccf: cache_end_idx, bboxes, scores, labels, masks = \ result_from_ccf(cached_res, img['id'], cache_end_idx) ltwh2ltrb_(bboxes) else: result = cached_res[img['id']] bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) else: frame = frames[ii] result = inference_detector(model, frame) bboxes, scores, labels, masks = \ parse_det_result(result, coco_mapping, n_class) rt_this = runtime_dist.draw() t_finishing = t + rt_this timestamps.append(t_finishing) if results_raw is not None: results_raw.append(result) results_parsed.append((bboxes, scores, labels, masks)) input_fidx.append(ii) runtime.append(rt_this) # since parallel excecution, the order is not guaranteed idx = np.argsort(timestamps) timestamps = [timestamps[i] for i in idx] if results_raw is not None: results_raw = [results_raw[i] for i in idx] results_parsed = [results_parsed[i] for i in idx] input_fidx = [input_fidx[i] for i in idx] runtime = [runtime[i] for i in idx] out_path = join(opts.out_dir, seq + '.pkl') if opts.overwrite or not isfile(out_path): out_dict = { 'results_parsed': results_parsed, 'timestamps': timestamps, 'input_fidx': input_fidx, 'runtime': runtime, } if results_raw is not None: out_dict['results_raw'] = results_raw pickle.dump(out_dict, open(out_path, 'wb')) runtime_all += runtime n_processed += len(results_parsed) runtime_all_np = np.array(runtime_all) n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum() out_path = join(opts.out_dir, 'time_info.pkl') if opts.overwrite or not isfile(out_path): pickle.dump( { 'runtime_all': runtime_all, 'n_processed': n_processed, 'n_total': n_total, 'n_small_runtime': n_small_runtime, }, open(out_path, 'wb')) # convert to ms for display s2ms = lambda x: 1e3 * x print(f'{n_processed}/{n_total} frames processed') print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms) print(f'Runtime smaller than unit time interval: ' f'{n_small_runtime}/{n_processed} ' f'({100.0*n_small_runtime/n_processed:.4g}%)')