Exemplo n.º 1
0
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = None if opts.no_class_mapping else db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    if opts.cached_res:
        cache_in_ccf = '_ccf' in basename(opts.cached_res)
        if cache_in_ccf:
            # speed up based on the assumption of sequential storage
            cache_end_idx = 0
        cached_res = pickle.load(open(opts.cached_res, 'rb'))
    else:
        assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing
        model = init_detector(opts)

    np.random.seed(opts.seed)
    runtime = pickle.load(open(opts.runtime, 'rb'))
    runtime_dist = dist_from_dict(runtime, opts.perf_factor)

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)
        n_total += n_frame

        if not opts.cached_res:
            # load all frames in advance
            frames = []
            for img in frame_list:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                frames.append(imread(img_path))
        
        timestamps = []
        results_parsed = []
        input_fidx = []
        runtime = []
        last_fidx = None
        if opts.cached_res and cache_in_ccf:
            results_raw = None
        else:
            results_raw = []
        
        t_total = n_frame/opts.fps
        t_elapsed = 0
        if opts.dynamic_schedule:
            mean_rtf = runtime_dist.mean()*opts.fps
        else:
            stride_cnt = 0

        while 1:
            if t_elapsed >= t_total:
                break

            # identify latest available frame
            fidx_continous = t_elapsed*opts.fps
            fidx = int(np.floor(fidx_continous))
            if fidx == last_fidx:
                # algorithm is fast and has some idle time
                fidx += 1
                if fidx == n_frame:
                    break
                t_elapsed = fidx/opts.fps
                
            last_fidx = fidx

            if opts.dynamic_schedule:
                if mean_rtf > 1:
                    # when runtime <= 1, it should always process every frame
                    fidx_remainder = fidx_continous - fidx
                    if mean_rtf < np.floor(fidx_remainder + mean_rtf):
                        # wait till next frame
                        continue
            else:
                if stride_cnt % opts.det_stride == 0:
                    stride_cnt = 1
                else:
                    stride_cnt += 1
                    continue

            if opts.cached_res:
                img = frame_list[fidx]
                if cache_in_ccf:
                    cache_end_idx, bboxes, scores, labels, masks = \
                        result_from_ccf(cached_res, img['id'], cache_end_idx)
                    ltwh2ltrb_(bboxes)
                else:
                    result = cached_res[img['id']]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
            else:
                frame = frames[fidx]
                result = inference_detector(model, frame)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            rt_this = runtime_dist.draw()
            t_elapsed += rt_this
            if t_elapsed >= t_total:
                break
            
            timestamps.append(t_elapsed)
            if results_raw is not None:
                results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(fidx)
            runtime.append(rt_this)

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            out_dict = {
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }
            if results_raw is not None:
                out_dict['results_raw'] = results_raw
            pickle.dump(out_dict, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_parsed)

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0/opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'runtime_all': runtime_all,
            'n_processed': n_processed,
            'n_total': n_total,
            'n_small_runtime': n_small_runtime,
        }, open(out_path, 'wb'))  

    # convert to ms for display
    s2ms = lambda x: 1e3*x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
        f'{n_small_runtime}/{n_processed} '
        f'({100.0*n_small_runtime/n_processed:.4g}%)')
Exemplo n.º 2
0
def main():
    opts = parse_args()
    assert opts.forecast_before_assoc, "Not implemented"

    mkdir2(opts.out_dir)
    vis_out = bool(opts.vis_dir)
    if vis_out:
        mkdir2(opts.vis_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    results_ccf = []
    in_time = 0
    miss = 0
    shifts = 0

    given_tracks = opts.assoc == 'given'
    assert not given_tracks, "Not implemented"

    t_assoc = []
    t_forecast = []
    # tt = []

    with torch.no_grad():
        kf_F = torch.eye(8)
        # kf_F[3, 7] = 1
        kf_Q = torch.eye(8)
        kf_R = 10 * torch.eye(4)
        kf_P_init = 100 * torch.eye(8).unsqueeze(0)

        for sid, seq in enumerate(tqdm(seqs)):
            frame_list = [img for img in db.imgs.values() if img['sid'] == sid]

            results = pickle.load(open(join(opts.in_dir, seq + '.pkl'), 'rb'))
            # use raw results when possible in case we change class subset during evaluation
            results_raw = results.get('results_raw', None)
            if results_raw is None:
                results_parsed = results['results_parsed']
            timestamps = results['timestamps']
            input_fidx = results['input_fidx']

            # t1 -> det1, t2 -> det2, interpolate at t3 (t3 is the current time)
            det_latest_p1 = 0  # latest detection index + 1
            det_t2 = None  # detection index at t2
            kf_x = torch.empty((0, 8, 1))
            kf_P = torch.empty((0, 8, 8))
            n_matched12 = 0

            if not given_tracks:
                tkidx = 0  # track starting index

            for ii, img in enumerate(frame_list):
                # pred, gt association by time
                t = (ii - opts.eta) / opts.fps
                while det_latest_p1 < len(
                        timestamps) and timestamps[det_latest_p1] <= t:
                    det_latest_p1 += 1
                if det_latest_p1 == 0:
                    # no detection output
                    miss += 1
                    bboxes_t3, scores, labels, tracks = [], [], [], []
                else:
                    det_latest = det_latest_p1 - 1
                    ifidx = input_fidx[det_latest]
                    in_time += int(ii == ifidx)
                    shifts += ii - ifidx

                    if det_latest != det_t2:
                        # new detection
                        # we can now throw away old result (t1)
                        # the old one is kept for forecasting purpose

                        if len(kf_x) and opts.forecast_before_assoc:
                            dt = ifidx - input_fidx[det_t2]
                            dt = int(
                                dt
                            )  # convert from numpy to basic python format
                            w_img, h_img = img['width'], img['height']

                            kf_F = make_F(kf_F, dt)
                            kf_Q = make_Q(kf_Q, dt)

                            kf_x, kf_P = batch_kf_predict(
                                kf_F, kf_x, kf_P, kf_Q)
                            bboxes_f = x2bbox(kf_x)

                        det_t2 = det_latest
                        if results_raw is None:
                            result_t2 = results_parsed[det_t2]
                            bboxes_t2, scores_t2, labels_t2, _ = result_t2[:4]
                            if not given_tracks and len(result_t2) > 4:
                                tracks_t2 = np.asarray(result_t2[4])
                            else:
                                tracks_t2 = np.empty((0, ), np.uint32)
                        else:
                            result_t2 = results_raw[det_t2]
                            bboxes_t2, scores_t2, labels_t2, _ = \
                                parse_det_result(result_t2, coco_mapping, n_class)

                        t1 = perf_counter()
                        n = len(bboxes_t2)
                        if n:
                            if not given_tracks:
                                # put high scores det first for better iou matching
                                score_argsort = np.argsort(scores_t2)[::-1]
                                bboxes_t2 = bboxes_t2[score_argsort]
                                scores_t2 = scores_t2[score_argsort]
                                labels_t2 = labels_t2[score_argsort]

                            ltrb2ltwh_(bboxes_t2)

                            if given_tracks:
                                raise NotImplementedError
                                order1, order2, n_matched12 = track_based_shuffle(
                                    tracks, tracks_t2, no_unmatched1=True)
                                tracks = tracks[order2]
                            else:
                                updated = False
                                if len(kf_x):
                                    order1, order2, n_matched12, tracks, tkidx = iou_assoc(
                                        bboxes_f,
                                        labels,
                                        tracks,
                                        tkidx,
                                        bboxes_t2,
                                        labels_t2,
                                        opts.match_iou_th,
                                        no_unmatched1=True,
                                    )

                                    if n_matched12:
                                        kf_x = kf_x[order1]
                                        kf_P = kf_P[order1]
                                        kf_x, kf_P = batch_kf_update(
                                            bbox2z(bboxes_t2[
                                                order2[:n_matched12]]),
                                            kf_x,
                                            kf_P,
                                            kf_R,
                                        )

                                        kf_x_new = bbox2x(
                                            bboxes_t2[order2[n_matched12:]])
                                        n_unmatched2 = len(
                                            bboxes_t2) - n_matched12
                                        kf_P_new = kf_P_init.expand(
                                            n_unmatched2, -1, -1)
                                        kf_x = torch.cat((kf_x, kf_x_new))
                                        kf_P = torch.cat((kf_P, kf_P_new))
                                        labels = labels_t2[order2]
                                        scores = scores_t2[order2]
                                        updated = True

                                if not updated:
                                    # start from scratch
                                    kf_x = bbox2x(bboxes_t2)
                                    kf_P = kf_P_init.expand(
                                        len(bboxes_t2), -1, -1)
                                    labels = labels_t2
                                    scores = scores_t2
                                    if not given_tracks:
                                        tracks = np.arange(tkidx,
                                                           tkidx + n,
                                                           dtype=np.uint32)
                                        tkidx += n

                            t2 = perf_counter()
                            t_assoc.append(t2 - t1)

                    t3 = perf_counter()
                    if len(kf_x):
                        dt = ii - ifidx
                        w_img, h_img = img['width'], img['height']

                        # PyTorch small matrix multiplication is slow
                        # use numpy instead

                        # kf_F = make_F(kf_F, dt)
                        # bboxes_t3 = x2bbox(batch_kf_predict_only(kf_F, kf_x))
                        # t5 = perf_counter()
                        kf_x_np = kf_x[:, :, 0].numpy()
                        # t6 = perf_counter()
                        bboxes_t3 = kf_x_np[:n_matched12, :
                                            4] + dt * kf_x_np[:n_matched12, 4:]
                        if n_matched12 < len(kf_x):
                            bboxes_t3 = np.concatenate(
                                (bboxes_t3, kf_x_np[n_matched12:, :4]))

                        bboxes_t3, keep = extrap_clean_up(bboxes_t3,
                                                          w_img,
                                                          h_img,
                                                          lt=True)
                        # torch_keep = torch.from_numpy(keep) # boolean mask
                        # kf_x = kf_x[torch_keep]
                        # kf_P = kf_P[torch_keep]
                        # labels = labels[keep]
                        # scores = scores[keep]
                        # tracks = tracks[keep]

                        labels_t3 = labels[keep]
                        scores_t3 = scores[keep]
                        tracks_t3 = tracks[keep]
                        # tt.append(t6 - t5)

                    t4 = perf_counter()
                    t_forecast.append(t4 - t3)

                n = len(bboxes_t3)
                for i in range(n):
                    result_dict = {
                        'image_id': img['id'],
                        'bbox': bboxes_t3[i],
                        'score': scores_t3[i],
                        'category_id': labels_t3[i],
                    }
                    results_ccf.append(result_dict)

                if vis_out:
                    img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                    I = imread(img_path)
                    vis_path = join(opts.vis_dir, seq,
                                    img['name'][:-3] + 'jpg')

                    bboxes = bboxes_t3.copy()
                    if n:
                        ltwh2ltrb_(bboxes)
                    if opts.overwrite or not isfile(vis_path):
                        vis_track(
                            I,
                            bboxes,
                            tracks,
                            labels,
                            class_names,
                            None,
                            scores,
                            out_scale=opts.vis_scale,
                            out_file=vis_path,
                        )

    # pr.disable()
    # pr.dump_stats('pps_iou_lin_extrap.prof')

    # pickle.dump([t_assoc, t_forecast], open('pps_iou_lin_forecast_time', 'wb'))
    s2ms = lambda x: 1e3 * x
    if len(t_assoc):
        print_stats(t_assoc, "RT association (ms)", cvt=s2ms)
    if len(t_forecast):
        print_stats(t_forecast, "RT forecasting (ms)", cvt=s2ms)
    # if len(tt):
    #     print_stats(tt, "RT forecasting2 (ms)", cvt=s2ms)

    out_path = join(opts.out_dir, 'results_ccf.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(results_ccf, open(out_path, 'wb'))

    if not opts.no_eval:
        eval_summary = eval_ccf(db, results_ccf)
        out_path = join(opts.out_dir, 'eval_summary.pkl')
        if opts.overwrite or not isfile(out_path):
            pickle.dump(eval_summary, open(out_path, 'wb'))

    if vis_out:
        print(f'python vis/make_videos.py "{opts.vis_dir}" --fps {opts.fps}')
Exemplo n.º 3
0
def main():
    assert torch.cuda.device_count() == 1 # mmdet only supports single GPU testing

    opts = parse_args()
    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    img = db.imgs[0]
    w_img, h_img = img['width'], img['height']

    mp.set_start_method('spawn')
    frame_recv, frame_send = mp.Pipe(False)
    det_res_recv, det_res_send = mp.Pipe(False)
    det_proc = mp.Process(target=det_process, args=(opts, frame_recv, det_res_send, w_img, h_img))
    det_proc.start()

    if opts.dynamic_schedule:
        runtime = pickle.load(open(opts.runtime, 'rb'))
        runtime_dist = dist_from_dict(runtime, opts.perf_factor)
        mean_rtf = runtime_dist.mean()*opts.fps

    n_total = 0
    t_det_all = []
    t_send_frame_all = []
    t_recv_res_all = []
    t_assoc_all = []
    t_forecast_all = []

    with torch.no_grad():
        kf_F = torch.eye(8)
        kf_Q = torch.eye(8)
        kf_R = 10*torch.eye(4)
        kf_P_init = 100*torch.eye(8).unsqueeze(0)

        for sid, seq in enumerate(tqdm(seqs)):
            frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
            frame_list = [join(opts.data_root, seq_dirs[sid], img['name']) for img in frame_list]
            n_frame = len(frame_list)
            n_total += n_frame
            
            timestamps = []
            results_parsed = []
            input_fidx = []
            
            processing = False
            fidx_t2 = None            # detection input index at t2
            fidx_latest = None
            tkidx = 0                 # track starting index
            kf_x = torch.empty((0, 8, 1))
            kf_P = torch.empty((0, 8, 8))
            n_matched12 = 0

            # let detector process to read all the frames
            frame_send.send(frame_list)
            # it is possible that unfetched results remain in the pipe
            while 1:
                msg = det_res_recv.recv() # wait till the detector is ready
                if msg == 'ready':
                    break
                elif isinstance(msg, Exception):
                    raise msg

            t_total = n_frame/opts.fps
            t_unit = 1/opts.fps
            t_start = perf_counter()
            while 1:
                t1 = perf_counter()
                t_elapsed = t1 - t_start
                if t_elapsed >= t_total:
                    break

                # identify latest available frame
                fidx_continous = t_elapsed*opts.fps
                fidx = int(np.floor(fidx_continous))
                if fidx == fidx_latest:
                    # algorithm is fast and has some idle time
                    wait_for_next = True
                else:
                    wait_for_next = False
                    if opts.dynamic_schedule:
                        if mean_rtf >= 1:
                            # when runtime < 1, it should always process every frame
                            fidx_remainder = fidx_continous - fidx
                            if mean_rtf < np.floor(fidx_remainder + mean_rtf):
                                # wait till next frame
                                wait_for_next = True

                if wait_for_next:
                    # sleep
                    continue

                if not processing:
                    t_start_frame = perf_counter()
                    frame_send.send((fidx, t_start_frame))
                    fidx_latest = fidx
                    processing = True
  
                # wait till query - forecast-rt-ub
                wait_time = t_unit - opts.forecast_rt_ub
                if det_res_recv.poll(wait_time): # wait
                    # new result
                    result = det_res_recv.recv() 
                    if isinstance(result, Exception):
                        raise result
                    result, t_send_frame, t_start_res = result
                    bboxes_t2, scores_t2, labels_t2, _ = \
                        parse_det_result(result, coco_mapping, n_class)
                    processing = False
                    t_det_end = perf_counter()
                    t_det_all.append(t_det_end - t_start_frame)
                    t_send_frame_all.append(t_send_frame)
                    t_recv_res_all.append(t_det_end - t_start_res)

                    # associate across frames
                    t_assoc_start = perf_counter()
                    if len(kf_x):
                        dt = fidx_latest - fidx_t2

                        kf_F = make_F(kf_F, dt)
                        kf_Q = make_Q(kf_Q, dt)
                        kf_x, kf_P = batch_kf_predict(kf_F, kf_x, kf_P, kf_Q)
                        bboxes_f = x2bbox(kf_x)
                                        
                    fidx_t2 = fidx_latest

                    n = len(bboxes_t2)
                    if n:
                        # put high scores det first for better iou matching
                        score_argsort = np.argsort(scores_t2)[::-1]
                        bboxes_t2 = bboxes_t2[score_argsort]
                        scores_t2 = scores_t2[score_argsort]
                        labels_t2 = labels_t2[score_argsort]

                        ltrb2ltwh_(bboxes_t2)

                    updated = False
                    if len(kf_x):
                        order1, order2, n_matched12, tracks, tkidx = iou_assoc(
                            bboxes_f, labels, tracks, tkidx,
                            bboxes_t2, labels_t2, opts.match_iou_th,
                            no_unmatched1=True,
                        )

                        if n_matched12:
                            kf_x = kf_x[order1]
                            kf_P = kf_P[order1]
                            kf_x, kf_P = batch_kf_update(
                                bbox2z(bboxes_t2[order2[:n_matched12]]),
                                kf_x,
                                kf_P,
                                kf_R,
                            )
                    
                            kf_x_new = bbox2x(bboxes_t2[order2[n_matched12:]])
                            n_unmatched2 = len(bboxes_t2) - n_matched12
                            kf_P_new = kf_P_init.expand(n_unmatched2, -1, -1)
                            kf_x = torch.cat((kf_x, kf_x_new))
                            kf_P = torch.cat((kf_P, kf_P_new))
                            labels = labels_t2[order2]
                            scores = scores_t2[order2]
                            updated = True

                    if not updated:
                        # start from scratch
                        kf_x = bbox2x(bboxes_t2)
                        kf_P = kf_P_init.expand(len(bboxes_t2), -1, -1)
                        labels = labels_t2
                        scores = scores_t2
                        tracks = np.arange(tkidx, tkidx + n, dtype=np.uint32)
                        tkidx += n

                    t_assoc_end = perf_counter()
                    t_assoc_all.append(t_assoc_end - t_assoc_start)

                # apply forecasting for the current query
                t_forecast_start = perf_counter()
                query_pointer = fidx + opts.eta + 1
                
                if len(kf_x):
                    dt = (query_pointer - fidx_t2)

                    kf_x_np = kf_x[:, :, 0].numpy()
                    bboxes_t3 = kf_x_np[:n_matched12, :4] + dt*kf_x_np[:n_matched12, 4:]
                    if n_matched12 < len(kf_x):
                        bboxes_t3 = np.concatenate((bboxes_t3, kf_x_np[n_matched12:, :4]))
                        
                    bboxes_t3, keep = extrap_clean_up(bboxes_t3, w_img, h_img, lt=True)
                    labels_t3 = labels[keep]
                    scores_t3 = scores[keep]
                    tracks_t3 = tracks[keep]

                else:
                    bboxes_t3 = np.empty((0, 4), dtype=np.float32)
                    scores_t3 = np.empty((0,), dtype=np.float32)
                    labels_t3 = np.empty((0,), dtype=np.int32)
                    tracks_t3 = np.empty((0,), dtype=np.int32)

                t_forecast_end = perf_counter()
                t_forecast_all.append(t_forecast_end - t_forecast_start)
                
                t3 = perf_counter()
                t_elapsed = t3 - t_start
                if t_elapsed >= t_total:
                    break

                if len(bboxes_t3):
                    ltwh2ltrb_(bboxes_t3)
                if fidx_t2 is not None:
                    timestamps.append(t_elapsed)
                    results_parsed.append((bboxes_t3, scores_t3, labels_t3, None, tracks_t3))
                    input_fidx.append(fidx_t2)

            out_path = join(opts.out_dir, seq + '.pkl')
            if opts.overwrite or not isfile(out_path):
                pickle.dump({
                    'results_parsed': results_parsed,
                    'timestamps': timestamps,
                    'input_fidx': input_fidx,
                }, open(out_path, 'wb'))

    # terminates the child process
    frame_send.send(None)

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump({
            'n_total': n_total,
            't_det': t_det_all,
            't_send_frame': t_send_frame_all,
            't_recv_res': t_recv_res_all,
            't_assoc': t_assoc_all,
            't_forecast': t_forecast_all,
        }, open(out_path, 'wb'))
 
    # convert to ms for display
    s2ms = lambda x: 1e3*x
    print_stats(t_det_all, 'Runtime detection (ms)', cvt=s2ms)
    print_stats(t_send_frame_all, 'Runtime sending the frame (ms)', cvt=s2ms)
    print_stats(t_recv_res_all, 'Runtime receiving the result (ms)', cvt=s2ms)
    print_stats(t_assoc_all, "Runtime association (ms)", cvt=s2ms)
    print_stats(t_forecast_all, "Runtime forecasting (ms)", cvt=s2ms)
def main():
    opts = parse_args()

    mkdir2(opts.out_dir)

    db = COCO(opts.annot_path)
    class_names = [c['name'] for c in db.dataset['categories']]
    n_class = len(class_names)
    coco_mapping = db.dataset.get('coco_mapping', None)
    if coco_mapping is not None:
        coco_mapping = np.asarray(coco_mapping)
    seqs = db.dataset['sequences']
    seq_dirs = db.dataset['seq_dirs']

    if opts.cached_res:
        cache_in_ccf = '_ccf' in basename(opts.cached_res)
        if cache_in_ccf:
            # speed up based on the assumption of sequential storage
            cache_end_idx = 0
        cached_res = pickle.load(open(opts.cached_res, 'rb'))
    else:
        assert torch.cuda.device_count(
        ) == 1  # mmdet only supports single GPU testing
        model = init_detector(opts)

    np.random.seed(opts.seed)
    runtime = pickle.load(open(opts.runtime, 'rb'))
    runtime_dist = dist_from_dict(runtime, opts.perf_factor)

    runtime_all = []
    n_processed = 0
    n_total = 0

    for sid, seq in enumerate(tqdm(seqs)):
        frame_list = [img for img in db.imgs.values() if img['sid'] == sid]
        n_frame = len(frame_list)
        n_total += n_frame

        if not opts.cached_res:
            # load all frames in advance
            frames = []
            for img in frame_list:
                img_path = join(opts.data_root, seq_dirs[sid], img['name'])
                frames.append(imread(img_path))

        timestamps = []
        results_raw = []
        results_parsed = []
        input_fidx = []
        runtime = []
        if opts.cached_res and cache_in_ccf:
            results_raw = None
        else:
            results_raw = []

        for ii in range(n_frame):
            t = ii / opts.fps

            if opts.cached_res:
                img = frame_list[ii]
                if cache_in_ccf:
                    cache_end_idx, bboxes, scores, labels, masks = \
                        result_from_ccf(cached_res, img['id'], cache_end_idx)
                    ltwh2ltrb_(bboxes)
                else:
                    result = cached_res[img['id']]
                    bboxes, scores, labels, masks = \
                        parse_det_result(result, coco_mapping, n_class)
            else:
                frame = frames[ii]
                result = inference_detector(model, frame)
                bboxes, scores, labels, masks = \
                    parse_det_result(result, coco_mapping, n_class)

            rt_this = runtime_dist.draw()
            t_finishing = t + rt_this

            timestamps.append(t_finishing)
            if results_raw is not None:
                results_raw.append(result)
            results_parsed.append((bboxes, scores, labels, masks))
            input_fidx.append(ii)
            runtime.append(rt_this)

        # since parallel excecution, the order is not guaranteed
        idx = np.argsort(timestamps)
        timestamps = [timestamps[i] for i in idx]
        if results_raw is not None:
            results_raw = [results_raw[i] for i in idx]
        results_parsed = [results_parsed[i] for i in idx]
        input_fidx = [input_fidx[i] for i in idx]
        runtime = [runtime[i] for i in idx]

        out_path = join(opts.out_dir, seq + '.pkl')
        if opts.overwrite or not isfile(out_path):
            out_dict = {
                'results_parsed': results_parsed,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }
            if results_raw is not None:
                out_dict['results_raw'] = results_raw
            pickle.dump(out_dict, open(out_path, 'wb'))

        runtime_all += runtime
        n_processed += len(results_parsed)

    runtime_all_np = np.array(runtime_all)
    n_small_runtime = (runtime_all_np < 1.0 / opts.fps).sum()

    out_path = join(opts.out_dir, 'time_info.pkl')
    if opts.overwrite or not isfile(out_path):
        pickle.dump(
            {
                'runtime_all': runtime_all,
                'n_processed': n_processed,
                'n_total': n_total,
                'n_small_runtime': n_small_runtime,
            }, open(out_path, 'wb'))

    # convert to ms for display
    s2ms = lambda x: 1e3 * x

    print(f'{n_processed}/{n_total} frames processed')
    print_stats(runtime_all_np, 'Runtime (ms)', cvt=s2ms)
    print(f'Runtime smaller than unit time interval: '
          f'{n_small_runtime}/{n_processed} '
          f'({100.0*n_small_runtime/n_processed:.4g}%)')