def run_seq(self, mot_tracker, seq):
        """
        Core function of tracking along a sequence using mot_tracker
        """
        frame = 0
        batch_time = tu.AverageMeter()
        frames_hypo = []
        frames_anno = []
        for i_f, data in tqdm(enumerate(seq), disable=not self.args.verbose):
            if not i_f % self.args.skip == 0:
                continue
            frame += 1  # detection and frame numbers begin at 1

            end = time.time()
            trackers = mot_tracker.update(data)
            batch_time.update(time.time() - end)

            # save gt frame annotations
            gt_anno = mot_tracker.frame_annotation
            frame_gt = {
                'timestamp': i_f,
                'num': i_f,
                'im_path': data['im_path'],
                'class': 'frame',
                'annotations': gt_anno
            }
            frames_anno.append(frame_gt)

            # save detect results
            frame_hypo = {
                'timestamp': i_f,
                'num': i_f,
                'im_path': data['im_path'],
                'class': 'frame',
                'hypotheses': trackers
            }
            frames_hypo.append(frame_hypo)

        if self.args.verbose:
            print('Time {batch_time.val:.3f} ({batch_time.avg:.3f}) '.format(
                batch_time=batch_time))

        return frames_anno, frames_hypo
    def eval_parallel(self, seq_gt_path, seq_pd_path):
        aos_meter = tu.AverageMeter()
        dim_meter = tu.AverageMeter()
        cen_meter = tu.AverageMeter()
        dm = []

        seq_gt = [json.load(open(_l, 'r')) for _l in json.load(open(seq_gt_path, 'r'))]
        seq_pd = json.load(open(seq_pd_path, 'r')) 

        for i, (frame_gt, frame_pd) in enumerate(zip(seq_gt, seq_pd)):
            labels = frame_gt['labels']
            cam_calib = np.array(frame_gt['intrinsics']['cali'])
            predictions = frame_pd['prediction']
            if len(predictions) == 0 or len(labels) == 0:
                continue

            box_gt = bh.get_box2d_array(labels)
            box_pd = bh.get_box2d_array(predictions)

            # Dim: H, W, L
            dim_pd = bh.get_label_array(predictions, 
                        ['box3d', 'dimension'], (0, 3)).astype(float)
            dim_gt = bh.get_label_array(labels, 
                        ['box3d', 'dimension'], (0, 3)).astype(float)
            # Alpha: -pi ~ pi
            alpha_pd = bh.get_label_array(predictions, 
                        ['box3d', 'alpha'], (0)).astype(float)
            alpha_gt = bh.get_label_array(labels, 
                        ['box3d', 'alpha'], (0)).astype(float)
            # Location in cam coord: x-right, y-down, z-front
            loc_pd = bh.get_label_array(predictions, 
                        ['box3d', 'location'], (0, 3)).astype(float)
            loc_gt = bh.get_label_array(labels, 
                        ['box3d', 'location'], (0, 3)).astype(float)
            # Depth
            depth_pd = np.maximum(0, loc_pd[:, 2])
            depth_gt = np.maximum(0, loc_gt[:, 2])
            center_pd = bh.get_cen_array(predictions)
            center_gt = tu.cameratoimage(loc_gt, cam_calib)

            if len(box_gt) > 0:
                iou, idx, valid = tu.get_iou(box_gt, box_pd[:, :4], 0.8)
            else:
                valid = np.array([False])

            if valid.any():
                # TODO: unmatched prediction and ground truth
                box_pd_v = box_pd[idx]
                alpha_pd_v = alpha_pd[idx]
                dim_pd_v = dim_pd[idx]
                depth_pd_v = depth_pd[idx]
                center_pd_v = center_pd[idx]

                aos_meter.update(np.mean(nu.compute_os(alpha_gt, alpha_pd_v)),
                                 alpha_gt.shape[0])
                dim_meter.update(np.mean(nu.compute_dim(dim_gt, dim_pd_v)),
                                 dim_gt.shape[0])
                w = (box_pd_v[:, 2:3] - box_pd_v[:, 0:1] + 1)
                h = (box_pd_v[:, 3:4] - box_pd_v[:, 1:2] + 1)
                cen_meter.update(
                    np.mean(nu.compute_cen(center_gt, center_pd_v, w, h)),
                    center_gt.shape[0])

                # Avoid zero in calculating a1, a2, a3
                mask = np.logical_and(depth_gt > self.min_depth,
                                      depth_gt < self.max_depth)
                mask = np.logical_and(mask, depth_pd_v > 0)
                if mask.any():
                    dm.append(
                        nu.compute_depth_errors(depth_gt[mask], depth_pd_v[mask]))
                else:
                    print("Not a valid depth range in GT")

        result = {'aos': aos_meter, 'dim': dim_meter, 'cen': cen_meter, 'dm': dm}
        return result
def test_model(model, args):
    assert args.batch_size == 1
    # get data list for tracking
    tracking_list_seq = []
    tracking_list = []
    batch_time = tu.AverageMeter()
    data_time = tu.AverageMeter()

    # resume from a checkpoint
    nu.load_checkpoint(model, args.resume, is_test=True)

    cudnn.benchmark = True

    dataset = Dataset(args.json_path, 'test', args.data_split,
                      args.set == 'kitti', args.percent, args.is_tracking,
                      args.is_normalizing, args.n_box_limit)

    print("Number of image to test: {}".format(dataset.__len__()))

    # Data loading code
    test_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True)

    model.eval()

    end = time.time()
    for i, (image, box_info) in enumerate(iter(test_loader)):
        # measure data loading time
        data_time.update(time.time() - end)
        end = time.time()

        with torch.no_grad():
            box_output, targets = model(image, box_info, args.device, 'test')

        batch_time.update(time.time() - end)

        rois_gt, \
        dim_gt_, \
        alpha_gt_, \
        dep_gt_, \
        cen_gt_, \
        loc_gt_, \
        ignore_, \
        tid_gt = targets

        cam_calib = box_info['cam_calib'].cpu().data.numpy().reshape(3, 4)
        cam_rot = box_info['cam_rot'].cpu().data.numpy()
        cam_loc = box_info['cam_loc'].cpu().data.numpy()
        box_gt = rois_gt.cpu().data.numpy()
        box_pd = box_output['rois'].cpu().data.numpy()
        dim_gt = dim_gt_.cpu().data.numpy()
        dim_pd = box_output['dim'].cpu().data.numpy()
        alpha_gt = alpha_gt_.cpu().data.numpy()
        alpha_pd = nu.get_alpha(box_output['rot'].cpu().data.numpy())
        depth_gt = dep_gt_.cpu().data.numpy()
        depth_pd = box_output['dep'].cpu().data.numpy()
        center_gt = cen_gt_.cpu().data.numpy()
        center_pd = box_output['cen'].cpu().data.numpy()
        loc_gt = loc_gt_.cpu().data.numpy()
        loc_pd = box_output['loc'].cpu().data.numpy()

        feature = F.normalize(
            F.avg_pool2d(box_output['feat'], (7, 7)).view(-1, 128))
        # feature = box_output['feat']
        feature_np = feature.cpu().data.numpy()

        tracking_list.append({
            'im_path': box_info['im_path'],
            'endvid': box_info['endvid'].cpu().data.numpy(),
            'rois_pd': box_pd,
            'rois_gt': box_gt,
            'feature': feature_np,
            'dim_pd': dim_pd,
            'alpha_pd': alpha_pd,
            'depth_pd': depth_pd,
            'center_pd': center_pd,
            'loc_pd': loc_pd,
            'dim_gt': dim_gt,
            'alpha_gt': alpha_gt,
            'depth_gt': depth_gt,
            'center_gt': center_gt,
            'loc_gt': loc_gt,
            'cam_calib': cam_calib,
            'cam_rot': cam_rot,
            'cam_loc': cam_loc,
            'ignore': ignore_.cpu().data.numpy(),
            'tid_gt': tid_gt.cpu().data.numpy(),
        })

        if box_info['endvid'].cpu().data.numpy().any() \
            or i == len(test_loader):
            tracking_list_seq.append(tracking_list)
            tracking_list = []

        if i % 100 == 0 and i != 0:
            print(i)
        end = time.time()

    if args.track_name is None:
        trk_name = os.path.join(
            cfg.OUTPUT_PATH, '{}_{}_{}_bdd_roipool_output.pkl'.format(
                args.session,
                str(args.start_epoch).zfill(3), args.set))
    else:
        trk_name = os.path.join(cfg.OUTPUT_PATH, args.track_name)

    with open(trk_name, 'wb') as f:
        print("Saving {} with total {} sequences...".format(
            trk_name, len(tracking_list_seq)))
        pickle.dump(tracking_list_seq, f)
def val_model(args, val_loader, model, epoch, phase, logger):
    aos_meter = tu.AverageMeter()
    dim_meter = tu.AverageMeter()
    cen_meter = tu.AverageMeter()
    dm = []
    name_line = "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, " \
                "{:>10}, {:>10}, {:>10}".format(
        'abs_rel', 'sq_rel', 'rms', 'log_rms', \
        'a1', 'a2', 'a3', 'AOS', 'DIM', 'CEN')

    for i, (image, box_info) in enumerate(iter(val_loader)):

        with torch.no_grad():
            box_output, targets = model(image, box_info, args.device, 'test')

        rois_gt, \
        dim_gt_, \
        alpha_gt_, \
        dep_gt_, \
        cen_gt_, \
        loc_gt_, \
        ignore_, \
        tid_gt = targets

        box_gt = rois_gt.cpu().data.numpy()
        box_pd = box_output['rois'].cpu().data.numpy()
        dim_gt = dim_gt_.cpu().data.numpy()
        dim_pd = box_output['dim'].cpu().data.numpy()
        alpha_gt = alpha_gt_.cpu().data.numpy()
        alpha_pd = nu.get_alpha(box_output['rot'].cpu().data.numpy())
        depth_gt = dep_gt_.cpu().data.numpy()
        depth_pd = box_output['dep'].cpu().data.numpy()
        center_gt = cen_gt_.cpu().data.numpy()
        center_pd = box_output['cen'].cpu().data.numpy()

        if len(box_gt) > 0:
            iou, idx, valid = tu.get_iou(box_gt, box_pd[:, :4], 0.85)
        else:
            valid = np.array([False])
        if valid.any():
            box_pd_v = box_pd[idx]
            alpha_pd_v = alpha_pd[idx]
            dim_pd_v = dim_pd[idx]
            depth_pd_v = depth_pd[idx]
            center_pd_v = center_pd[idx]

            aos_meter.update(np.mean(nu.compute_os(alpha_gt, alpha_pd_v)),
                             alpha_gt.shape[0])
            dim_meter.update(np.mean(nu.compute_dim(dim_gt, dim_pd_v)),
                             dim_gt.shape[0])
            w = (box_pd_v[:, 2:3] - box_pd_v[:, 0:1] + 1)
            h = (box_pd_v[:, 3:4] - box_pd_v[:, 1:2] + 1)
            cen_meter.update(
                np.mean(nu.compute_cen(center_gt, center_pd_v, w, h)),
                center_gt.shape[0])

            # Avoid zero in calculating a1, a2, a3
            mask = np.logical_and(depth_gt > args.min_depth,
                                  depth_gt < args.max_depth)
            mask = np.logical_and(mask, depth_pd_v > 0)
            if mask.any():
                dm.append(
                    nu.compute_depth_errors(depth_gt[mask], depth_pd_v[mask]))
            else:
                print("Not a valid depth range in GT")

        if i % 100 == 0 and i != 0:
            depth_metrics = np.mean(dm, axis=0)
            data_line = "{:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, " \
                        "{:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, " \
                        "{:10.3f}".format(
                depth_metrics[0].mean(), depth_metrics[1].mean(), \
                depth_metrics[2].mean(), depth_metrics[3].mean(), \
                depth_metrics[5].mean(), depth_metrics[6].mean(), \
                depth_metrics[7].mean(), \
                aos_meter.avg, dim_meter.avg, cen_meter.avg)
            print(i)
            print(name_line)
            print(data_line)

    print("Validation Result:")
    depth_metrics = np.mean(dm, axis=0)
    data_line = "{:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, " \
                "{:10.3f}, {:10.3f}, {:10.3f}, " \
                "{:10.3f}".format(
        depth_metrics[0].mean(), depth_metrics[1].mean(), \
        depth_metrics[2].mean(), depth_metrics[3].mean(), \
        depth_metrics[5].mean(), depth_metrics[6].mean(), \
        depth_metrics[7].mean(), \
        aos_meter.avg, dim_meter.avg, cen_meter.avg)
    print(name_line)
    print(data_line)

    return depth_metrics[5].mean(
    ) + aos_meter.avg + dim_meter.avg + cen_meter.avg
def train_model(args, train_loader, model, optimizer, epoch, phase, logger):
    batch_time = tu.AverageMeter()
    data_time = tu.AverageMeter()
    losses = tu.AverageMeter()
    losses_dim = tu.AverageMeter()
    losses_rot = tu.AverageMeter()
    losses_dep = tu.AverageMeter()
    losses_cen = tu.AverageMeter()
    data_size = len(train_loader)

    end = time.time()
    for i, (image, box_info) in enumerate(iter(train_loader)):
        # measure data loading time
        data_time.update(time.time() - end)
        end = time.time()

        optimizer.zero_grad()

        # track history if only in train
        _, losses_ = model(image, box_info, args.device, phase)

        loss_dim = torch.mean(losses_[0])
        loss_rot = torch.mean(losses_[1])
        loss_dep = torch.mean(losses_[2])
        loss_cen = torch.mean(losses_[3])

        loss = loss_dim + loss_rot + loss_dep * args.depth_weight + loss_cen

        # measure accuracy and record loss
        losses.update(loss.cpu().data.numpy().item())
        losses_dim.update(loss_dim.cpu().data.numpy().item())
        losses_rot.update(loss_rot.cpu().data.numpy().item())
        losses_dep.update(loss_dep.cpu().data.numpy().item())
        losses_cen.update(loss_cen.cpu().data.numpy().item())

        # compute gradient and do SGD step
        loss.backward()
        optimizer.step()

        if args.use_tfboard:
            loss_info = {
                'L_all': losses.avg,
                'L_dim': losses_dim.avg,
                'L_rot': losses_rot.avg,
                'L_dep': losses_dep.avg,
                'L_cen': losses_cen.avg,
            }
            logger.add_scalars("loss_{}/".format(args.session), loss_info,
                               epoch * data_size + i)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('[{NAME} - {SESS} - {PHASE}][{EP}][{IT}/{TO}] '
                  'Time {batch_time.val:.2f} ({batch_time.avg:.2f}) '
                  'Data {data_time.val:.2f} ({data_time.avg:.2f}) '
                  'Loss {loss.val:.3f} ({loss.avg:.3f}) '
                  'Dim {dim.val:.3f} ({dim.avg:.3f}) '
                  'Alpha {alpha.val:.3f} ({alpha.avg:.3f}) '
                  'Depth {depth.val:.3f} ({depth.avg:.3f}) '
                  'Center {center.val:.3f} ({center.avg:.3f}) '.format(
                      NAME=args.set.upper(),
                      PHASE=phase,
                      SESS=args.session,
                      EP=epoch,
                      IT=i,
                      TO=data_size,
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      dim=losses_dim,
                      alpha=losses_rot,
                      depth=losses_dep,
                      center=losses_cen))