Exemplo n.º 1
0
def initLatent(loader, model, Y, nViews, S, AVG = False):
  model.eval()
  nIters = len(loader)
  N = loader.dataset.nImages 
  M = np.zeros((N, ref.J, 3))
  bar = Bar('==>', max=nIters)
  sum_sigma2 = 0
  cnt_sigma2 = 1
  for i, (input, target, meta) in enumerate(loader):
    output = (model(torch.autograd.Variable(input)).data).cpu().numpy()
    G = output.shape[0] / nViews
    output = output.reshape(G, nViews, ref.J, 3)
    if AVG:
      for g in range(G):
        id = int(meta[g * nViews, 1])
        for j in range(nViews):
          RR, tt = horn87(output[g, j].transpose(), output[g, 0].transpose())
          MM = (np.dot(RR, output[g, j].transpose())).transpose().copy()
          M[id] += MM.copy() / nViews
    else:
      for g in range(G):
        #assert meta[g * nViews, 0] > 1 + ref.eps
        p = np.zeros(nViews)
        sigma2 = 0.1
        for j in range(nViews):
          for kk in range(Y.shape[0] / S):
            k = kk * S
            d = Dis(Y[k], output[g, j])
            sum_sigma2 += d 
            cnt_sigma2 += 1
            p[j] += np.exp(- d / 2 / sigma2)
            
        id = int(meta[g * nViews, 1])
        M[id] = output[g, p.argmax()]
        
        if DEBUG and g == 0:
          print 'M[id]', id, M[id], p.argmax()
          debugger = Debugger()
          for j in range(nViews):
            RR, tt = horn87(output[g, j].transpose(), output[g, p.argmax()].transpose())
            MM = (np.dot(RR, output[g, j].transpose())).transpose().copy()
            debugger.addPoint3D(MM, 'b')
            debugger.addImg(input[g * nViews + j].numpy().transpose(1, 2, 0), j)
          debugger.showAllImg()
          debugger.addPoint3D(M[id], 'r')
          debugger.show3D()
        
    
    Bar.suffix = 'Init    : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Dis: {dis:.6f}'.format(i, nIters, total=bar.elapsed_td, eta=bar.eta_td, dis = sum_sigma2 / cnt_sigma2)
    bar.next()
  bar.finish()
  #print 'mean sigma2', sum_sigma2 / cnt_sigma2
  return M
Exemplo n.º 2
0
def main():
    opt = opts().parse()
    if opt.loadModel == '':
        opt.loadModel = '../models/Pascal3D-cpu.pth'
    model = torch.load(opt.loadModel)
    img = cv2.imread(opt.demo)
    s = max(img.shape[0], img.shape[1]) * 1.0
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
    img = Crop(img, c, s, 0, ref.inputRes).astype(np.float32).transpose(
        2, 0, 1) / 256.
    input = torch.from_numpy(img.copy()).float()
    input = input.view(1, input.size(0), input.size(1), input.size(2))
    input_var = torch.autograd.Variable(input).float()
    if opt.GPU > -1:
        model = model.cuda(opt.GPU)
        input_var = input_var.cuda(opt.GPU)

    output = model(input_var)
    hm = output[-1].data.cpu().numpy()

    debugger = Debugger()
    img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy()
    inp = img.copy()
    star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255)
    star[star > 255] = 255
    star[star < 0] = 0
    star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0)
    trans = 0.8
    star = (trans * star + (1. - trans) * img).astype(np.uint8)

    ps = parseHeatmap(hm[0], thresh=0.1)
    canonical, pred, color, score = [], [], [], []
    for k in range(len(ps[0])):
        x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) *
                   ref.outputRes).astype(np.int32)
        dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(
            np.int32)
        canonical.append([x, y, z])
        pred.append([ps[1][k], ref.outputRes - dep, ref.outputRes - ps[0][k]])
        score.append(hm[0, 0, ps[0][k], ps[1][k]])
        color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes,
                      1.0 * z / ref.outputRes))
        cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1)
        cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2,
                   (int(z * 4), int(y * 4), int(x * 4)), -1)

    pred = np.array(pred).astype(np.float32)
    canonical = np.array(canonical).astype(np.float32)

    pointS = canonical * 1.0 / ref.outputRes
    pointT = pred * 1.0 / ref.outputRes
    R, t, s = horn87(pointS.transpose(), pointT.transpose(), score)

    rotated_pred = s * np.dot(
        R, canonical.transpose()).transpose() + t * ref.outputRes

    debugger.addImg(inp, 'inp')
    debugger.addImg(star, 'star')
    debugger.addImg(img, 'nms')
    debugger.addPoint3D(canonical / ref.outputRes - 0.5, c=color, marker='^')
    debugger.addPoint3D(pred / ref.outputRes - 0.5, c=color, marker='x')
    debugger.addPoint3D(rotated_pred / ref.outputRes - 0.5,
                        c=color,
                        marker='*')

    debugger.showAllImg(pause=True)
    debugger.show3D()
Exemplo n.º 3
0
    def debug(self, batch, output, iter_id, dataset):
        opt = self.opt
        if 'pre_hm' in batch:
            output.update({'pre_hm': batch['pre_hm']})
        dets = generic_decode(output, K=opt.K, opt=opt)
        for k in dets:
            dets[k] = dets[k].detach().cpu().numpy()
        dets_gt = batch['meta']['gt_det']
        for i in range(1):
            debugger = Debugger(opt=opt, dataset=dataset)
            img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')

            if 'pre_img' in batch:
                pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose(
                    1, 2, 0)
                pre_img = np.clip(
                    ((pre_img * dataset.std + dataset.mean) * 255), 0,
                    255).astype(np.uint8)
                debugger.add_img(pre_img, 'pre_img_pred')
                debugger.add_img(pre_img, 'pre_img_gt')
                if 'pre_hm' in batch:
                    pre_hm = debugger.gen_colormap(
                        batch['pre_hm'][i].detach().cpu().numpy())
                    debugger.add_blend_img(pre_img, pre_hm, 'pre_hm')

            debugger.add_img(img, img_id='out_pred')
            if 'ltrb_amodal' in opt.heads:
                debugger.add_img(img, img_id='out_pred_amodal')
                debugger.add_img(img, img_id='out_gt_amodal')

            # Predictions
            for k in range(len(dets['scores'][i])):
                if dets['scores'][i, k] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets['bboxes'][i, k] *
                                           opt.down_ratio,
                                           dets['clses'][i, k],
                                           dets['scores'][i, k],
                                           img_id='out_pred')

                    if 'ltrb_amodal' in opt.heads:
                        debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] *
                                               opt.down_ratio,
                                               dets['clses'][i, k],
                                               dets['scores'][i, k],
                                               img_id='out_pred_amodal')

                    if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0:
                        debugger.add_coco_hp(dets['hps'][i, k] *
                                             opt.down_ratio,
                                             img_id='out_pred')

                    if 'tracking' in opt.heads:
                        debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio,
                                           dets['tracking'][i][k] *
                                           opt.down_ratio,
                                           img_id='out_pred')
                        debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio,
                                           dets['tracking'][i][k] *
                                           opt.down_ratio,
                                           img_id='pre_img_pred')

            # Ground truth
            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt['scores'][i])):
                if dets_gt['scores'][i][k] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets_gt['bboxes'][i][k] *
                                           opt.down_ratio,
                                           dets_gt['clses'][i][k],
                                           dets_gt['scores'][i][k],
                                           img_id='out_gt')

                    if 'ltrb_amodal' in opt.heads:
                        debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] *
                                               opt.down_ratio,
                                               dets_gt['clses'][i, k],
                                               dets_gt['scores'][i, k],
                                               img_id='out_gt_amodal')

                    if 'hps' in opt.heads and \
                      (int(dets['clses'][i, k]) == 0):
                        debugger.add_coco_hp(dets_gt['hps'][i][k] *
                                             opt.down_ratio,
                                             img_id='out_gt')

                    if 'tracking' in opt.heads:
                        debugger.add_arrow(
                            dets_gt['cts'][i][k] * opt.down_ratio,
                            dets_gt['tracking'][i][k] * opt.down_ratio,
                            img_id='out_gt')
                        debugger.add_arrow(
                            dets_gt['cts'][i][k] * opt.down_ratio,
                            dets_gt['tracking'][i][k] * opt.down_ratio,
                            img_id='pre_img_gt')

            if 'hm_hp' in opt.heads:
                pred = debugger.gen_colormap_hp(
                    output['hm_hp'][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(
                    batch['hm_hp'][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, 'pred_hmhp')
                debugger.add_blend_img(img, gt, 'gt_hmhp')

            if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads:
                dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt}
                calib = batch['meta']['calib'].detach().numpy() \
                 if 'calib' in batch['meta'] else None
                det_pred = generic_post_process(
                    opt, dets, batch['meta']['c'].cpu().numpy(),
                    batch['meta']['s'].cpu().numpy(), output['hm'].shape[2],
                    output['hm'].shape[3], self.opt.num_classes, calib)
                det_gt = generic_post_process(opt, dets_gt,
                                              batch['meta']['c'].cpu().numpy(),
                                              batch['meta']['s'].cpu().numpy(),
                                              output['hm'].shape[2],
                                              output['hm'].shape[3],
                                              self.opt.num_classes, calib)

                debugger.add_3d_detection(batch['meta']['img_path'][i],
                                          batch['meta']['flipped'][i],
                                          det_pred[i],
                                          calib[i],
                                          vis_thresh=opt.vis_thresh,
                                          img_id='add_pred')
                debugger.add_3d_detection(batch['meta']['img_path'][i],
                                          batch['meta']['flipped'][i],
                                          det_gt[i],
                                          calib[i],
                                          vis_thresh=opt.vis_thresh,
                                          img_id='add_gt')
                debugger.add_bird_views(det_pred[i],
                                        det_gt[i],
                                        vis_thresh=opt.vis_thresh,
                                        img_id='bird_pred_gt')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Exemplo n.º 4
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()

    crit = torch.nn.MSELoss()
    crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var)

    acc_idxs = data_loader.dataset.acc_idxs
    edges = data_loader.dataset.edges
    edges_3d = data_loader.dataset.edges_3d
    shuffle_ref = data_loader.dataset.shuffle_ref
    mean = data_loader.dataset.mean
    std = data_loader.dataset.std
    convert_eval_format = data_loader.dataset.convert_eval_format

    Loss, Loss3D = AverageMeter(), AverageMeter()
    Acc, MPJPE = AverageMeter(), AverageMeter()
    data_time, batch_time = AverageMeter(), AverageMeter()
    preds = []
    time_str = ''

    nIters = len(data_loader)
    bar = Bar('{}'.format(opt.exp_id), max=nIters)

    end = time.time()
    for i, batch in enumerate(data_loader):
        data_time.update(time.time() - end)
        for k in batch:
            if k != 'meta':
                batch[k] = batch[k].cuda(device=opt.device, non_blocking=True)
        gt_2d = batch['meta']['pts_crop'].cuda(
            device=opt.device, non_blocking=True).float() / opt.output_h
        output = model(batch['input'])

        loss = crit(output[-1]['hm'], batch['target'])
        loss_3d = crit_3d(output[-1]['depth'], batch['reg_mask'],
                          batch['reg_ind'], batch['reg_target'], gt_2d)
        for k in range(opt.num_stacks - 1):
            loss += crit(output[k], batch['target'])
            loss_3d = crit_3d(output[-1]['depth'], batch['reg_mask'],
                              batch['reg_ind'], batch['reg_target'], gt_2d)
        loss += loss_3d

        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            input_ = batch['input'].cpu().numpy().copy()
            input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
            input_flip_var = torch.from_numpy(input_).cuda(device=opt.device,
                                                           non_blocking=True)
            output_flip_ = model(input_flip_var)
            output_flip = shuffle_lr(
                flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]),
                shuffle_ref)
            output_flip = output_flip.reshape(1, opt.num_output, opt.output_h,
                                              opt.output_w)
            output_depth_flip = shuffle_lr(
                flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]),
                shuffle_ref)
            output_depth_flip = output_depth_flip.reshape(
                1, opt.num_output, opt.output_h, opt.output_w)
            output_flip = torch.from_numpy(output_flip).cuda(device=opt.device,
                                                             non_blocking=True)
            output_depth_flip = torch.from_numpy(output_depth_flip).cuda(
                device=opt.device, non_blocking=True)
            output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
            output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2
            # pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy())
            # preds.append(convert_eval_format(pred, conf, meta)[0])

        Loss.update(loss.item(), batch['input'].size(0))
        Loss3D.update(loss_3d.item(), batch['input'].size(0))
        Acc.update(
            accuracy(output[-1]['hm'].detach().cpu().numpy(),
                     batch['target'].detach().cpu().numpy(), acc_idxs))
        mpeje_batch, mpjpe_cnt = mpjpe(
            output[-1]['hm'].detach().cpu().numpy(),
            output[-1]['depth'].detach().cpu().numpy(),
            batch['meta']['gt_3d'].detach().numpy(),
            convert_func=convert_eval_format)
        MPJPE.update(mpeje_batch, mpjpe_cnt)

        batch_time.update(time.time() - end)
        end = time.time()
        if not opt.hide_data_time:
            time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                       ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)

        Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\
                     '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\
                     '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\
                     '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td,
                                         eta=bar.eta_td, loss=Loss, Acc=Acc,
                                         split=split, time_str=time_str,
                                         MPJPE=MPJPE, loss_3d=Loss3D)
        if opt.print_iter > 0:
            if i % opt.print_iter == 0:
                print('{}| {}'.format(opt.exp_id, Bar.suffix))
        else:
            bar.next()
        if opt.debug >= 2:
            gt, amb_idx = get_preds(batch['target'].cpu().numpy())
            gt *= 4
            pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy())
            pred *= 4
            debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
            img = (batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std +
                   mean) * 256
            img = img.astype(np.uint8).copy()
            debugger.add_img(img)
            debugger.add_mask(
                cv2.resize(batch['target'][0].cpu().numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'target')
            debugger.add_mask(
                cv2.resize(
                    output[-1]['hm'][0].detach().cpu().numpy().max(axis=0),
                    (opt.input_w, opt.input_h)), img, 'pred')
            debugger.add_point_2d(gt[0], (0, 0, 255))
            debugger.add_point_2d(pred[0], (255, 0, 0))
            debugger.add_point_3d(batch['meta']['gt_3d'].detach().numpy()[0],
                                  'r',
                                  edges=edges_3d)
            pred_3d, ignore_idx = get_preds_3d(
                output[-1]['hm'].detach().cpu().numpy(),
                output[-1]['depth'].detach().cpu().numpy(), amb_idx)
            debugger.add_point_3d(convert_eval_format(pred_3d[0]),
                                  'b',
                                  edges=edges_3d)
            debugger.show_all_imgs(pause=False)
            debugger.show_3d()

    bar.finish()
    return {
        'loss': Loss.avg,
        'acc': Acc.avg,
        'mpjpe': MPJPE.avg,
        'time': bar.elapsed_td.total_seconds() / 60.
    }, preds
Exemplo n.º 5
0
import _init_paths

import numpy as np

from opts import opts
from datasets.dataset.yolo import YOLO
from utils.debugger import Debugger

if __name__ == '__main__':
    opt = opts().parse()
    dataset = YOLO(opt.data_dir, opt.flip, opt.vflip, opt.rotate, opt.scale,
                   opt.shear, opt, 'train')
    opt = opts().update_dataset_info_and_set_heads(opt, dataset)
    for i in range(len(dataset)):
        debugger = Debugger(dataset=opt.names)
        data = dataset[i]
        img = data['input'].transpose(1, 2, 0)
        hm = data['hm']
        dets_gt = data['meta']['gt_det']
        dets_gt[:, :4] *= opt.down_ratio
        img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0,
                      255).astype(np.uint8)
        pred = debugger.gen_colormap(hm)
        debugger.add_blend_img(img, pred, 'pred_hm')
        debugger.add_img(img, img_id='out_pred')
        for k in range(len(dets_gt)):
            debugger.add_coco_bbox(dets_gt[k, :4],
                                   dets_gt[k, -1],
                                   dets_gt[k, 4],
                                   img_id='out_pred')
Exemplo n.º 6
0
    pred[1] = (pred[1] - opt.numBins / 2) * PI / (opt.numBins / 2.)
    pred[2] = (pred[2] - opt.numBins / 2) * PI / (opt.numBins / 2.)

    bestR = angle2dcm(pred)

    R_gt = angle2dcm(gt_view)
    err_ = ((logm(np.dot(np.transpose(bestR), R_gt))**
             2).sum())**0.5 / (2.**0.5) * 180 / PI

    num[class_name] += 1
    acc[class_name] += 1 if err_ <= 30. else 0
    err[class_name].append(err_)

    if DEBUG:
        input, target, mask, view = dataset[index]
        debugger = Debugger()
        img = (input[:3].transpose(1, 2, 0) * 256).astype(np.uint8).copy()
        debugger.addImg(img)
        debugger.showAllImg(pause=False)

accAll = 0.
numAll = 0.
mid = {}
err_all = []
for k, v in ref.pascalClassName.items():
    accAll += acc[v]
    numAll += num[v]
    acc[v] = 1.0 * acc[v] / num[v]
    mid[v] = np.sort(np.array(err[v]))[len(err[v]) // 2]
    err_all = err_all + err[v]
print('Acc', acc)
Exemplo n.º 7
0
def demo(opt):
    # creat folder save results
    os.mkdir('../Detection/bboxes_{}'.format(opt.arch))
    os.mkdir('../visualization/{}'.format(opt.arch))

    ###
    class_map = {1: 1, 2: 2}  # color for boundingbox
    ###

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str

    ###
    opt.debug = max(opt.debug, 1)
    ###

    Detector = detector_factory[opt.task]
    detector = Detector(opt)

    assert os.path.isdir(opt.demo), 'Need path to videos directory.'
    video_paths = [
        os.path.join(opt.demo, video_name)
        for video_name in os.listdir(opt.demo)
        if video_name.split('.')[-1] == 'mp4'
    ]

    # video_paths = [
    #     os.path.join(opt.demo, 'cam_2.mp4')
    # ]
    ###
    debugger = Debugger(dataset=opt.dataset, theme=opt.debugger_theme)
    ###

    for video_path in sorted(video_paths):
        bboxes = []
        video = cv2.VideoCapture(video_path)
        width, height = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
            video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        ###
        bbox_video = cv2.VideoWriter(
            filename='../visualization/{}/'.format(opt.arch) +
            os.path.basename(video_path),
            fourcc=cv2.VideoWriter_fourcc(*'mp4v'),
            fps=float(30),
            frameSize=(width, height),
            isColor=True)
        ###

        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        for i in tqdm(range(num_frames)):
            # skip_frame
            if opt.skip_frame > 0:
                if i % opt.skip_frame == 0:
                    continue

            _, img = video.read()

            ret = detector.run(img)
            bboxes.append(ret['results'])

            ###
            debugger.add_img(img, img_id='default')
            for class_id in class_map.keys():
                for bbox in ret['results'][class_id]:
                    if bbox[4] > opt.vis_thresh:
                        debugger.add_coco_bbox(bbox[:4],
                                               class_map[class_id],
                                               bbox[4],
                                               img_id='default')
            bbox_img = debugger.imgs['default']
            bbox_video.write(bbox_img)


###

        with open(
                '../Detection/bboxes_{}'.format(opt.arch) + '/' +
                os.path.basename(video_path) + '.pkl', 'wb') as f:
            pickle.dump(bboxes, f)
Exemplo n.º 8
0
 def debug(self, batch, output, iter_id):
     opt = self.opt
     detections = self.decode(output['hm_t'], output['hm_l'],
                              output['hm_b'], output['hm_r'],
                              output['hm_c']).detach().cpu().numpy()
     detections[:, :, :4] *= opt.input_res / opt.output_res
     for i in range(1):
         debugger = Debugger(
             dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme)
         pred_hm = np.zeros(
             (opt.input_res, opt.input_res, 3), dtype=np.uint8)
         gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
         img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
         img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8)
         for p in self.parts:
             tag = 'hm_{}'.format(p)
             pred = debugger.gen_colormap(
                 output[tag][i].detach().cpu().numpy())
             gt = debugger.gen_colormap(
                 batch[tag][i].detach().cpu().numpy())
             if p != 'c':
                 pred_hm = np.maximum(pred_hm, pred)
                 gt_hm = np.maximum(gt_hm, gt)
             if p == 'c' or opt.debug > 2:
                 debugger.add_blend_img(img, pred, 'pred_{}'.format(p))
                 debugger.add_blend_img(img, gt, 'gt_{}'.format(p))
         debugger.add_blend_img(img, pred_hm, 'pred')
         debugger.add_blend_img(img, gt_hm, 'gt')
         debugger.add_img(img, img_id='out')
         for k in range(len(detections[i])):
             if detections[i, k, 4] > 0.1:
                 debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1],
                                        detections[i, k, 4], img_id='out')
         if opt.debug == 4:
             debugger.save_all_imgs(
                 opt.debug_dir, prefix='{}'.format(iter_id))
         else:
             debugger.show_all_imgs(pause=True)
Exemplo n.º 9
0
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)

        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                print("no pre_processed")
                # intrinsic =[338.158, 0, 319.077, 0, 0, 338.158, 242.885, 0, 0, 0, 1, 0] #depth
                intrinsic = [
                    614.678, 0, 318.892, 0, 0, 614.93, 240.121, 0, 0, 0, 1, 0
                ]
                calib = np.array(intrinsic, dtype=np.float32)
                calib = calib.reshape(3, 4)
                images, meta = self.pre_process(image, scale, calib)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)
            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            output, dets, forward_time = self.process(images, return_time=True)

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.opt.debug >= 2:
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        if self.opt.debug >= 1:
            image_id = str(1234321)
            self.show_results(debugger, image, results, image_id)

        return {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }
Exemplo n.º 10
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()

    crit = torch.nn.MSELoss()

    acc_idxs = data_loader.dataset.acc_idxs
    edges = data_loader.dataset.edges
    shuffle_ref = data_loader.dataset.shuffle_ref
    mean = data_loader.dataset.mean
    std = data_loader.dataset.std
    convert_eval_format = data_loader.dataset.convert_eval_format

    Loss, Acc = AverageMeter(), AverageMeter()
    data_time, batch_time = AverageMeter(), AverageMeter()
    preds = []

    nIters = len(data_loader)
    bar = Bar('{}'.format(opt.exp_id), max=nIters)

    end = time.time()
    for i, batch in enumerate(data_loader):
        data_time.update(time.time() - end)
        input, target, meta = batch['input'], batch['target'], batch['meta']
        input_var = input.cuda(device=opt.device, non_blocking=True)
        target_var = target.cuda(device=opt.device, non_blocking=True)

        output = model(input_var)

        loss = crit(output[-1]['hm'], target_var)
        for k in range(opt.num_stacks - 1):
            loss += crit(output[k], target_var)

        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            input_ = input.cpu().numpy().copy()
            input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
            input_flip_var = torch.from_numpy(input_).cuda(device=opt.device,
                                                           non_blocking=True)
            output_flip = model(input_flip_var)
            output_flip = shuffle_lr(
                flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]),
                shuffle_ref)
            output_flip = output_flip.reshape(1, opt.num_output, opt.output_h,
                                              opt.output_w)
            # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2
            output_flip = torch.from_numpy(output_flip).cuda(device=opt.device,
                                                             non_blocking=True)
            output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
            pred, conf = get_preds(output[-1]['hm'].detach().cpu().numpy(),
                                   True)
            preds.append(convert_eval_format(pred, conf, meta)[0])

        Loss.update(loss.detach()[0], input.size(0))
        Acc.update(
            accuracy(output[-1]['hm'].detach().cpu().numpy(),
                     target_var.detach().cpu().numpy(), acc_idxs))

        batch_time.update(time.time() - end)
        end = time.time()
        if not opt.hide_data_time:
            time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                       ' |Net {bt.avg:.3f}s'.format(dt = data_time,
                                                                   bt = batch_time)
        else:
            time_str = ''
        Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \
                     '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\
                     '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td,
                                         eta=bar.eta_td, loss=Loss, Acc=Acc,
                                         split = split, time_str = time_str)
        if opt.print_iter > 0:
            if i % opt.print_iter == 0:
                print('{}| {}'.format(opt.exp_id, Bar.suffix))
        else:
            bar.next()
        if opt.debug >= 2:
            gt = get_preds(target.cpu().numpy()) * 4
            pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4
            debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
            img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256
            img = img.astype(np.uint8).copy()
            debugger.add_img(img)
            debugger.add_mask(
                cv2.resize(target[0].numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'target')
            debugger.add_mask(
                cv2.resize(
                    output[-1]['hm'][0].detach().cpu().numpy().max(axis=0),
                    (opt.input_w, opt.input_h)), img, 'pred')
            debugger.add_point_2d(pred[0], (255, 0, 0))
            debugger.add_point_2d(gt[0], (0, 0, 255))
            debugger.show_all_imgs(pause=True)

    bar.finish()
    return {
        'loss': Loss.avg,
        'acc': Acc.avg,
        'time': bar.elapsed_td.total_seconds() / 60.
    }, preds
Exemplo n.º 11
0
def demo_image(image, model, opt):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]

    pdb.set_trace()

    debugger = Debugger()
    debugger.add_img(image)
    debugger.add_point_2d(pred, (255, 0, 0))
    debugger.add_point_3d(pred_3d, 'b')
    debugger.show_all_imgs(pause=True)
    debugger.show_3d()
Exemplo n.º 12
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        #dets = ctdet_decode(
        #  output['hm'], output['wh'], reg=reg,
        #  cat_spec_wh=opt.cat_spec_wh, K=opt.K)

        dets = gridneighbordet_decode(output['hm'],
                                      output['wh'],
                                      opt.point_flags,
                                      reg=reg,
                                      cat_spec_wh=opt.cat_spec_wh,
                                      K=opt.K)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :4] *= opt.down_ratio
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= opt.down_ratio
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')
            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets[i, k, :4],
                                           dets[i, k, -1],
                                           dets[i, k, 4],
                                           img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets_gt[i, k, :4],
                                           dets_gt[i, k, -1],
                                           dets_gt[i, k, 4],
                                           img_id='out_gt')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Exemplo n.º 13
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        # print(output)
        dets = circledet_decode(output['hm'],
                                output['cl'],
                                reg=reg,
                                cat_spec_wh=opt.cat_spec_wh,
                                K=opt.K)

        # print(dets)
        if opt.filter_boarder:
            output_h = self.opt.default_resolution[
                0] // self.opt.down_ratio  #hard coded
            output_w = self.opt.default_resolution[
                1] // self.opt.down_ratio  #hard coded
            for i in range(dets.shape[1]):
                cp = [0, 0]
                cp[0] = dets[0, i, 0]
                cp[1] = dets[0, i, 1]
                cr = dets[0, i, 2]
                if cp[0] - cr < 0 or cp[0] + cr > output_w:
                    dets[0, i, 3] = 0
                    continue
                if cp[1] - cr < 0 or cp[1] + cr > output_h:
                    dets[0, i, 3] = 0
                    continue

        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :3] *= opt.down_ratio
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :3] *= opt.down_ratio
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')
            debugger.add_img(img, img_id='out_pred')

            for k in range(len(dets[i])):
                # print('risk = %f' % dets[i, k, 3])
                if dets[i, k, 3] > opt.center_thresh:
                    debugger.add_coco_circle(dets[i, k, :3],
                                             dets[i, k, -1],
                                             dets[i, k, 3],
                                             img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 3] > opt.center_thresh:
                    debugger.add_coco_circle(dets_gt[i, k, :3],
                                             dets_gt[i, k, -1],
                                             dets_gt[i, k, 3],
                                             img_id='out_gt')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Exemplo n.º 14
0
def stepLatent(loader, model, M_, Y, nViews, lamb, mu, S):
  model.eval()
  nIters = len(loader)
  if nIters == 0:
    return None
  N = loader.dataset.nImages
  M = np.zeros((N, ref.J, 3))
    
  bar = Bar('==>', max=nIters)
  ids = []
  Mij = np.zeros((N, ref.J, 3))
  err, num = 0, 0
  for i, (input, target, meta) in enumerate(loader):
    output = (model(torch.autograd.Variable(input)).data).cpu().numpy()
    G = output.shape[0] / nViews
    output = output.reshape(G, nViews, ref.J, 3)
    for g in range(G):
      #assert meta[g * nViews, 0] > 1 + ref.eps
      id = int(meta[g * nViews, 1])
      ids.append(id)
      #debugger = Debugger()
      for j in range(nViews):
        Rij, tt = horn87(output[g, j].transpose(), M_[id].transpose())
        Mj = (np.dot(Rij, output[g, j].transpose()).copy()).transpose().copy()
        err += ((Mj - M_[id]) ** 2).sum()
        num += 1
        Mij[id] = Mij[id] + Mj / nViews 
        #print 'id, j, nViews', id, j, nViews
        #debugger.addPoint3D(Mj, 'b')
      #debugger.addPoint3D(M_[id], 'r')
      #debugger.show3D()
      
    Bar.suffix = 'Step Mij: [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Err : {err:.6f}'.format(i, nIters, total=bar.elapsed_td, eta=bar.eta_td, err = err / num)
    bar.next()
  bar.finish()
  if mu < ref.eps:
    for id in ids:
      M[id] = Mij[id]
    return M
  
  Mi = np.zeros((N, ref.J, 3))
  bar = Bar('==>', max=len(ids))
  err, num = 0, 0
  for i, id in enumerate(ids):
    dis = np.ones((Y.shape[0])) * oo
    for kk in range(Y.shape[0] / S):
      k = kk * S
      dis[k] = Dis(Y[k], M_[id])
    minK = np.argmin(dis)
    Ri, tt = horn87(Y[minK].transpose(), M_[id].transpose())
    Mi_ = (np.dot(Ri, Y[minK].transpose())).transpose()
    Mi[id] = Mi[id] + Mi_
    err += dis[minK]
    num += 1
    Bar.suffix = 'Step Mi : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Err: {err:.6f}'.format(i, len(ids), total=bar.elapsed_td, eta=bar.eta_td, err = err / num)
    bar.next()
  bar.finish()
  
  tI = np.zeros((Y.shape[0] / S, 3))
  MI = np.zeros((N, ref.J, 3))
  cnt = np.zeros(N)
  bar = Bar('==>', max=Y.shape[0] / S)
  err, num = 0, 0
  for kk in range(Y.shape[0] / S):
    k = kk * S
    dis = np.ones((N)) * oo
    for id in ids:
      dis[id] = Dis(Y[k], M_[id])
    minI = np.argmin(dis)
    RI, tt = horn87(Y[k].transpose(1, 0), M_[minI].transpose(1, 0))
    MI_ = (np.dot(RI, Y[k].transpose())).transpose()
    err += ((MI_ - M_[minI]) ** 2).sum()
    num += 1
    MI[minI] = MI[minI] + MI_
    cnt[minI] += 1
    Bar.suffix = 'Step MI : [{0:3}/{1:3}] | Total: {total:} | ETA: {eta:} | Err: {err:.6f}'.format(kk, Y.shape[0] / S, total=bar.elapsed_td, eta=bar.eta_td, err = err / num)
    bar.next()
  bar.finish()
  
  for id in ids:
    M[id] = (Mij[id] * (lamb / mu) + Mi[id] + MI[id] / (Y.shape[0] / S) * len(ids)) / (lamb / mu + 1 + cnt[id] / (Y.shape[0] / S) * (len(ids)))
  if DEBUG:
    for id in ids:
      debugger = Debugger()
      debugger.addPoint3D(M[id], 'b')
      debugger.addPoint3D(M_[id], 'r')
      debugger.show3D()
  return M
Exemplo n.º 15
0
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            #pre_processed= False
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta)
            else:
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)
            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time
            #输入imgs,输出process结果
            output, dets, forward_time = self.process(images, return_time=True)

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.opt.debug >= 2:
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        if self.opt.debug == 1:
            self.show_results(debugger, image, results)
        if self.opt.debug == 2:
            self.generate_results(debugger, image, results)

        #如果debug = 7 则只输出包围框的坐标值,不显示图像
        # if self.opt.debug == -2:
        # self.generate_results(debugger, image, results)

        return {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }
Exemplo n.º 16
0
    def run(self,
            image_or_path_or_tensor_l,
            image_or_path_or_tensor_r,
            mono_est,
            meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor_l, np.ndarray):
            image = image_or_path_or_tensor_l
        elif type(image_or_path_or_tensor_l) == type(''):
            self.image_path = image_or_path_or_tensor_l
            image_l = cv2.imread(image_or_path_or_tensor_l)
            image_r = cv2.imread(image_or_path_or_tensor_r)

            calib_path = os.path.join(
                self.opt.calib_dir, image_or_path_or_tensor_l[-10:-3] + 'txt')
            calib = self.read_clib(calib_path)
            calib = torch.from_numpy(calib).unsqueeze(0).to(self.opt.device)
            calib3 = self.read_clib3(calib_path)
            calib3 = torch.from_numpy(calib3).unsqueeze(0).to(self.opt.device)
        else:
            image = image_or_path_or_tensor_l['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor_l
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)
        # cv2.imshow('s',image_l)
        # cv2.waitKey(0)
        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                images_l, meta = self.pre_process(image_l, scale, meta)
                images_r, _ = self.pre_process(image_r, scale, meta)
                meta['imag_name'] = image_or_path_or_tensor_l[-10:-3]
                meta['trans_output_l'] = meta['trans_output_l'].to(
                    self.opt.device)
                meta['trans_output_r'] = meta['trans_output_r'].to(
                    self.opt.device)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            meta['calib_l'] = calib
            meta['calib_r'] = calib3
            images_l = images_l.to(self.opt.device)
            images_r = images_r.to(self.opt.device)
            self.read_est_from_mono(mono_est, meta)
            torch.cuda.synchronize()

            meta['input'] = images_l
            meta['input_r'] = images_r
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time
            output, dets, forward_time = self.process(meta, return_time=True)
            net_time += forward_time  #- pre_process_time
            torch.cuda.synchronize()

            decode_time = time.time()
            dec_time += decode_time - pre_process_time

            if self.opt.debug >= 2:
                self.debug(debugger, images_l, dets, output, scale)

            #dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        #results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        if self.opt.debug >= 1:
            self.show_results(debugger, image_l, image_r, dets, calib)

        return {
            'results': dets,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }
Exemplo n.º 17
0
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type (''): 
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        try:
            _, _, _ = image.shape
        except AttributeError:
            print("Nonetype image at {}".format(image_or_path_or_tensor))

        loaded_time = time.time()
        load_time += (loaded_time - start_time)
        
        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)
            if 'cpu' not in self.opt.device.type:
                torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            output, dets, forward_time = self.process(images, return_time=True)

            if 'cpu' not in self.opt.device.type:
                torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time
            
            if self.opt.debug >= 2:
                self.debug(debugger, images, dets, output, scale)
            
            dets = self.post_process(dets, meta, scale)
            if 'cpu' not in self.opt.device.type:
                torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)
        
        results = self.merge_outputs(detections)
        if 'cpu' not in self.opt.device.type:
            torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        # parse to our general format:
        cate_dict = OrderedDict()
        num_classes = 80
        if self.opt.dataset == 'semantic_line_kaist':
            num_classes = 14
        for i in range(num_classes):
            cate_dict.update({str(i): debugger.names[i]})
        xml_pth = self.opt.save_path.replace("Images", "Preds")
        if not os.path.exists(xml_pth):
            os.makedirs(xml_pth)
        if self.opt.task == 'ctdet_line':
            bboxesToxml = parseLineCenterNet(results, image)
            bboxes_dict_to_xml = parseLineDict(bboxesToxml['detection_boxes'],
                                               bboxesToxml['detection_classes'],
                                               bboxesToxml['detection_scores'],
                                               bboxesToxml['detection_directs'],
                                               cate_dict,
                                               min_score_thresh=self.opt.vis_thresh)
            writeXml_line(box_dict_in=bboxes_dict_to_xml,
                          image_filename=image_or_path_or_tensor,
                          image_in=image,
                          image_dir=image_or_path_or_tensor.split('/')[-2],
                          image_dst_in=self.opt.save_path,
                          xml_dst_in=xml_pth)
        else:
            bboxesToxml = parseBBoxesCornerNetLite(results, image)
            bboxes_dict_to_xml = parseBBoxDict(bboxesToxml['detection_boxes'],
                                               bboxesToxml['detection_classes'],
                                               bboxesToxml['detection_scores'],
                                               cate_dict,
                                               min_score_thresh=self.opt.vis_thresh)
            writeXml(box_dict_in=bboxes_dict_to_xml,
                     image_filename=image_or_path_or_tensor,
                     image_in=image,
                     image_dir=image_or_path_or_tensor.split('/')[-2],
                     image_dst_in=self.opt.save_path,
                     xml_dst_in=xml_pth)

        if self.opt.debug >= 1:
            self.show_results(debugger, image, results, self.image_counter, image_or_path_or_tensor)
        self.image_counter += 1
        return {'results': results, 'tot': tot_time, 'load': load_time,
                        'pre': pre_time, 'net': net_time, 'dec': dec_time,
                        'post': post_time, 'merge': merge_time}
Exemplo n.º 18
0
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()
    Loss, Acc, Mpjpe, Loss3D = AverageMeter(), AverageMeter(), AverageMeter(
    ), AverageMeter()

    nIters = len(dataLoader)
    bar = Bar('==>', max=nIters)

    for i, (input, target2D, target3D, meta) in enumerate(dataLoader):
        input_var = torch.autograd.Variable(input).float().cuda()
        target2D_var = torch.autograd.Variable(target2D).float().cuda()
        target3D_var = torch.autograd.Variable(target3D).float().cuda()

        output = model(input_var)
        reg = output[opt.nStack]
        if opt.DEBUG >= 2:
            gt = getPreds(target2D.cpu().numpy()) * 4
            pred = getPreds((output[opt.nStack - 1].data).cpu().numpy()) * 4
            debugger = Debugger()
            debugger.addImg(
                (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8))
            debugger.addPoint2D(pred[0], (255, 0, 0))
            debugger.addPoint2D(gt[0], (0, 0, 255))
            debugger.showImg()
            debugger.saveImg('debug/{}.png'.format(i))

        loss = FusionCriterion(opt.regWeight, opt.varWeight)(reg, target3D_var)
        Loss3D.update(loss.data[0], input.size(0))
        for k in range(opt.nStack):
            loss += criterion(output[k], target2D_var)

        Loss.update(loss.data[0], input.size(0))
        Acc.update(
            Accuracy((output[opt.nStack - 1].data).cpu().numpy(),
                     (target2D_var.data).cpu().numpy()))
        mpjpe, num3D = MPJPE((output[opt.nStack - 1].data).cpu().numpy(),
                             (reg.data).cpu().numpy(), meta, opt)
        if num3D > 0:
            Mpjpe.update(mpjpe, num3D)
        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Loss3D {loss3d.avg:.6f} | Acc {Acc.avg:.6f} | Mpjpe {Mpjpe.avg:.6f} ({Mpjpe.val:.6f})'.format(
            epoch,
            i,
            nIters,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=Loss,
            Acc=Acc,
            split=split,
            Mpjpe=Mpjpe,
            loss3d=Loss3D)
        bar.next()

    bar.finish()
    return Loss.avg, Acc.avg, Mpjpe.avg, Loss3D.avg
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):  # 暂时用不到
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(
                ''):  # 经过demo.py的处理:image_or_path_or_tensor是单张图片的路径
            # print("image_or_path_or_tensor:", image_or_path_or_tensor)
            image = cv2.imread(image_or_path_or_tensor)
        else:  # 暂时用不到
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:  # self.scales = 1 # 这个参数等于2或者0.5也可以,但是等于1检测效果最好
            scale_start_time = time.time()
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)
            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            output, dets, forward_time = self.process(
                images, return_time=True)  # 这一步是底层的检测,是针对nparray数组的

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.opt.debug >= 2:
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        # if self.opt.debug == 0: # add by zengyuan
        #   pass
        # print("self.opt.debug", self.opt.debug)

        if self.opt.debug >= 1:  # 意思是:每检测一张图片就展示检测结果图片,这两句注释掉就可以直接往下检测
            self.show_results(debugger, image, results)

        return {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }
Exemplo n.º 20
0
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger((self.cfg.DEBUG == 3),
                            theme=self.cfg.DEBUG_THEME,
                            num_classes=self.cfg.MODEL.NUM_CLASSES,
                            dataset=self.cfg.SAMPLE_METHOD,
                            down_ratio=self.cfg.MODEL.DOWN_RATIO)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta)
            else:
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(torch.device('cuda'))
            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            output, dets, forward_time = self.process(images, return_time=True)

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.cfg.DEBUG >= 2:
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        if self.cfg.DEBUG >= 1:
            self.show_results(debugger, image, results)

        return {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }
Exemplo n.º 21
0
class Detector(object):
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')

        print('Creating model...')
        self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
        self.model = load_model(self.model, opt.load_model, opt)
        self.model = self.model.to(opt.device)
        self.model.eval()

        self.opt = opt
        self.trained_dataset = get_dataset(opt.dataset)
        self.mean = np.array(self.trained_dataset.mean,
                             dtype=np.float32).reshape(1, 1, 3)
        self.std = np.array(self.trained_dataset.std,
                            dtype=np.float32).reshape(1, 1, 3)
        self.pause = not opt.no_pause
        self.rest_focal_length = self.trained_dataset.rest_focal_length \
          if self.opt.test_focal_length < 0 else self.opt.test_focal_length
        self.flip_idx = self.trained_dataset.flip_idx
        self.cnt = 0
        self.pre_images = None
        self.pre_image_ori = None
        self.tracker = Tracker(opt)
        self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)

    def run(self, image_or_path_or_tensor, meta={}):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, track_time, tot_time, display_time = 0, 0, 0, 0
        self.debugger.clear()
        start_time = time.time()

        # read image
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []

        # for multi-scale testing
        for scale in self.opt.test_scales:
            scale_start_time = time.time()
            if not pre_processed:
                # not prefetch testing or demo
                images, meta = self.pre_process(image, scale, meta)
            else:
                # prefetch testing
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
                if 'pre_dets' in pre_processed_images['meta']:
                    meta['pre_dets'] = pre_processed_images['meta']['pre_dets']
                if 'cur_dets' in pre_processed_images['meta']:
                    meta['cur_dets'] = pre_processed_images['meta']['cur_dets']

            images = images.to(self.opt.device,
                               non_blocking=self.opt.non_block_test)

            # initializing tracker
            pre_hms, pre_inds = None, None
            if self.opt.tracking:
                # initialize the first frame
                if self.pre_images is None:
                    print('Initialize tracking!')
                    self.pre_images = images
                    self.tracker.init_track(meta['pre_dets'] if 'pre_dets' in
                                            meta else [])
                if self.opt.pre_hm:
                    # render input heatmap from tracker status
                    # pre_inds is not used in the current version.
                    # We used pre_inds for learning an offset from previous image to
                    # the current image.
                    pre_hms, pre_inds = self._get_additional_inputs(
                        self.tracker.tracks,
                        meta,
                        with_hm=not self.opt.zero_pre_hm)

            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            # run the network
            # output: the output feature maps, only used for visualizing
            # dets: output tensors after extracting peaks
            output, dets, forward_time = self.process(images,
                                                      self.pre_images,
                                                      pre_hms,
                                                      pre_inds,
                                                      return_time=True)
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            # convert the cropped and 4x downsampled output coordinate system
            # back to the input image coordinate system
            result = self.post_process(dets, meta, scale)
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(result)
            if self.opt.debug >= 2:
                self.debug(self.debugger,
                           images,
                           result,
                           output,
                           scale,
                           pre_images=self.pre_images
                           if not self.opt.no_pre_img else None,
                           pre_hms=pre_hms)

        # merge multi-scale testing results
        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time

        # if self.opt.tracking:
        #   # public detection mode in MOT challenge
        #   public_det = meta['cur_dets'] if self.opt.public_det else None
        #   # add tracking id to results
        #   results = self.tracker.step(results, public_det)
        #   self.pre_images = images

        tracking_time = time.time()
        track_time += tracking_time - end_time
        tot_time += tracking_time - start_time

        # if self.opt.debug >= 1:
        #   self.show_results(self.debugger, image, results)
        # self.cnt += 1

        show_results_time = time.time()
        display_time += show_results_time - end_time

        # return results and run time
        ret = {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time,
            'track': track_time,
            'display': display_time
        }
        if self.opt.save_video:
            try:
                # return debug image for saving video
                ret.update({'generic': self.debugger.imgs['generic']})
            except:
                pass
        return ret

    def _transform_scale(self, image, scale=1):
        '''
      Prepare input image in different testing modes.
        Currently support: fix short size/ center crop to a fixed size/ 
        keep original resolution but pad to a multiplication of 32
    '''
        height, width = image.shape[0:2]
        new_height = int(height * scale)
        new_width = int(width * scale)
        if self.opt.fix_short > 0:
            if height < width:
                inp_height = self.opt.fix_short
                inp_width = (int(width / height * self.opt.fix_short) +
                             63) // 64 * 64
            else:
                inp_height = (int(height / width * self.opt.fix_short) +
                              63) // 64 * 64
                inp_width = self.opt.fix_short
            c = np.array([width / 2, height / 2], dtype=np.float32)
            s = np.array([width, height], dtype=np.float32)
        elif self.opt.fix_res:
            inp_height, inp_width = self.opt.input_h, self.opt.input_w
            c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
            s = max(height, width) * 1.0
            # s = np.array([inp_width, inp_height], dtype=np.float32)
        else:
            inp_height = (new_height | self.opt.pad) + 1
            inp_width = (new_width | self.opt.pad) + 1
            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
            s = np.array([inp_width, inp_height], dtype=np.float32)
        resized_image = cv2.resize(image, (new_width, new_height))
        return resized_image, c, s, inp_width, inp_height, height, width

    def pre_process(self, image, scale, input_meta={}):
        '''
    Crop, resize, and normalize image. Gather meta data for post processing 
      and tracking.
    '''
        resized_image, c, s, inp_width, inp_height, height, width = \
          self._transform_scale(image)
        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
        out_height = inp_height // self.opt.down_ratio
        out_width = inp_width // self.opt.down_ratio
        trans_output = get_affine_transform(c, s, 0, [out_width, out_height])

        inp_image = cv2.warpAffine(resized_image,
                                   trans_input, (inp_width, inp_height),
                                   flags=cv2.INTER_LINEAR)
        inp_image = ((inp_image / 255. - self.mean) / self.std).astype(
            np.float32)

        images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height,
                                                      inp_width)
        if self.opt.flip_test:
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        meta = {'calib': np.array(input_meta['calib'], dtype=np.float32) \
                 if 'calib' in input_meta else \
                 self._get_default_calib(width, height)}
        meta.update({
            'c': c,
            's': s,
            'height': height,
            'width': width,
            'out_height': out_height,
            'out_width': out_width,
            'inp_height': inp_height,
            'inp_width': inp_width,
            'trans_input': trans_input,
            'trans_output': trans_output
        })
        if 'pre_dets' in input_meta:
            meta['pre_dets'] = input_meta['pre_dets']
        if 'cur_dets' in input_meta:
            meta['cur_dets'] = input_meta['cur_dets']
        return images, meta

    def _trans_bbox(self, bbox, trans, width, height):
        '''
    Transform bounding boxes according to image crop.
    '''
        bbox = np.array(copy.deepcopy(bbox), dtype=np.float32)
        bbox[:2] = affine_transform(bbox[:2], trans)
        bbox[2:] = affine_transform(bbox[2:], trans)
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1)
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1)
        return bbox

    def _get_additional_inputs(self, dets, meta, with_hm=True):
        '''
    Render input heatmap from previous trackings.
    '''
        trans_input, trans_output = meta['trans_input'], meta['trans_output']
        inp_width, inp_height = meta['inp_width'], meta['inp_height']
        out_width, out_height = meta['out_width'], meta['out_height']
        input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)

        output_inds = []
        for det in dets:
            if det['score'] < self.opt.pre_thresh or det['active'] == 0:
                continue
            bbox = self._trans_bbox(det['bbox'], trans_input, inp_width,
                                    inp_height)
            bbox_out = self._trans_bbox(det['bbox'], trans_output, out_width,
                                        out_height)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if with_hm:
                    draw_umich_gaussian(input_hm[0], ct_int, radius)
                ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2,
                                   (bbox_out[1] + bbox_out[3]) / 2],
                                  dtype=np.int32)
                output_inds.append(ct_out[1] * out_width + ct_out[0])
        if with_hm:
            input_hm = input_hm[np.newaxis]
            if self.opt.flip_test:
                input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]),
                                          axis=0)
            input_hm = torch.from_numpy(input_hm).to(self.opt.device)
        output_inds = np.array(output_inds, np.int64).reshape(1, -1)
        output_inds = torch.from_numpy(output_inds).to(self.opt.device)
        return input_hm, output_inds

    def _get_default_calib(self, width, height):
        calib = np.array([[self.rest_focal_length, 0, width / 2, 0],
                          [0, self.rest_focal_length, height / 2, 0],
                          [0, 0, 1, 0]])
        return calib

    def _sigmoid_output(self, output):
        if 'hm' in output:
            output['hm'] = output['hm'].sigmoid_()
        if 'hm_bdd' in output:
            output['hm_bdd'] = output['hm_bdd'].sigmoid_()
        if 'hm_tl' in output:
            output['hm_tl'] = output['hm_tl'].sigmoid_()
        if 'hm_hp' in output:
            output['hm_hp'] = output['hm_hp'].sigmoid_()
        if 'dep' in output:
            output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
            output['dep'] *= self.opt.depth_scale
        return output

    def _flip_output(self, output):
        average_flips = ['hm', 'wh', 'dep', 'dim']
        neg_average_flips = ['amodel_offset']
        single_flips = [
            'ltrb', 'nuscenes_att', 'velocity', 'ltrb_amodal', 'reg',
            'hp_offset', 'rot', 'tracking', 'pre_hm'
        ]
        for head in output:
            if head in average_flips:
                output[head] = (output[head][0:1] +
                                flip_tensor(output[head][1:2])) / 2
            if head in neg_average_flips:
                flipped_tensor = flip_tensor(output[head][1:2])
                flipped_tensor[:, 0::2] *= -1
                output[head] = (output[head][0:1] + flipped_tensor) / 2
            if head in single_flips:
                output[head] = output[head][0:1]
            if head == 'hps':
                output['hps'] = (output['hps'][0:1] + flip_lr_off(
                    output['hps'][1:2], self.flip_idx)) / 2
            if head == 'hm_hp':
                output['hm_hp'] = (output['hm_hp'][0:1] + \
                  flip_lr(output['hm_hp'][1:2], self.flip_idx)) / 2

        return output

    def process(self,
                images,
                pre_images=None,
                pre_hms=None,
                pre_inds=None,
                return_time=False):
        with torch.no_grad():
            torch.cuda.synchronize()
            output = self.model(images, pre_images, pre_hms)[-1]
            output = self._sigmoid_output(output)
            output.update({'pre_inds': pre_inds})
            if self.opt.flip_test:
                output = self._flip_output(output)
            torch.cuda.synchronize()
            forward_time = time.time()

            dets = generic_decode_custom_tl(output, K=self.opt.K, opt=self.opt)
            torch.cuda.synchronize()
            for k in dets:
                dets[k] = dets[k].detach().cpu().numpy()
        if return_time:
            return output, dets, forward_time
        else:
            return output, dets

    def post_process(self, dets, meta, scale=1):
        dets = generic_post_process(self.opt, dets, [meta['c']], [meta['s']],
                                    meta['out_height'], meta['out_width'],
                                    self.opt.num_classes, [meta['calib']],
                                    meta['height'], meta['width'])
        self.this_calib = meta['calib']

        if scale != 1:
            for i in range(len(dets[0])):
                for k in ['bbox', 'hps']:
                    if k in dets[0][i]:
                        dets[0][i][k] = (np.array(dets[0][i][k], np.float32) /
                                         scale).tolist()
        return dets[0]

    def merge_outputs(self, detections):
        assert len(self.opt.test_scales) == 1, 'multi_scale not supported!'
        results = []
        for i in range(len(detections[0])):
            if detections[0][i]['score'] > self.opt.out_thresh:
                results.append(detections[0][i])
        return results

    def debug(self,
              debugger,
              images,
              dets,
              output,
              scale=1,
              pre_images=None,
              pre_hms=None):
        img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
        img = np.clip(((img * self.std + self.mean) * 255.), 0,
                      255).astype(np.uint8)
        pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
        debugger.add_blend_img(img, pred, 'pred_hm')
        if 'hm_hp' in output:
            pred = debugger.gen_colormap_hp(
                output['hm_hp'][0].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hmhp')

        if pre_images is not None:
            pre_img = pre_images[0].detach().cpu().numpy().transpose(1, 2, 0)
            pre_img = np.clip(((pre_img * self.std + self.mean) * 255.), 0,
                              255).astype(np.uint8)
            debugger.add_img(pre_img, 'pre_img')
            if pre_hms is not None:
                pre_hm = debugger.gen_colormap(
                    pre_hms[0].detach().cpu().numpy())
                debugger.add_blend_img(pre_img, pre_hm, 'pre_hm')

    def show_results(self, debugger, image, results):
        debugger.add_img(image, img_id='generic')
        if self.opt.tracking:
            debugger.add_img(self.pre_image_ori
                             if self.pre_image_ori is not None else image,
                             img_id='previous')
            self.pre_image_ori = image

        for j in range(len(results)):
            if results[j]['score'] > self.opt.vis_thresh:
                if 'active' in results[j] and results[j]['active'] == 0:
                    continue
                item = results[j]
                if ('bbox' in item):
                    sc = item['score'] if self.opt.demo == '' or \
                      not ('tracking_id' in item) else item['tracking_id']
                    sc = item[
                        'tracking_id'] if self.opt.show_track_color else sc

                    debugger.add_coco_bbox(item['bbox'],
                                           item['class'] - 1,
                                           sc,
                                           img_id='generic')

                if 'tracking' in item:
                    debugger.add_arrow(item['ct'],
                                       item['tracking'],
                                       img_id='generic')

                tracking_id = item[
                    'tracking_id'] if 'tracking_id' in item else -1
                if 'tracking_id' in item and self.opt.demo == '' and \
                  not self.opt.show_track_color:
                    debugger.add_tracking_id(item['ct'],
                                             item['tracking_id'],
                                             img_id='generic')

                if (item['class'] in [1, 2]) and 'hps' in item:
                    debugger.add_coco_hp(item['hps'],
                                         tracking_id=tracking_id,
                                         img_id='generic')

        if len(results) > 0 and \
          'dep' in results[0] and 'alpha' in results[0] and 'dim' in results[0]:
            debugger.add_3d_detection(
                image if not self.opt.qualitative else cv2.resize(
                    debugger.imgs['pred_hm'],
                    (image.shape[1], image.shape[0])),
                False,
                results,
                self.this_calib,
                vis_thresh=self.opt.vis_thresh,
                img_id='ddd_pred')
            debugger.add_bird_view(results,
                                   vis_thresh=self.opt.vis_thresh,
                                   img_id='bird_pred',
                                   cnt=self.cnt)
            if self.opt.show_track_color and self.opt.debug == 4:
                del debugger.imgs['generic'], debugger.imgs['bird_pred']
        if 'ddd_pred' in debugger.imgs:
            debugger.imgs['generic'] = debugger.imgs['ddd_pred']
        if self.opt.debug == 4:
            debugger.save_all_imgs(self.opt.debug_dir,
                                   prefix='{}'.format(self.cnt))
        else:
            1
            # debugger.show_all_imgs(pause=self.pause)

    def reset_tracking(self):
        self.tracker.reset()
        self.pre_images = None
        self.pre_image_ori = None
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()
    Loss, Acc = AverageMeter(), AverageMeter()
    preds = []

    nIters = len(dataLoader)
    bar = Bar('{}'.format(opt.expID), max=nIters)

    for i, (input, targets, action, meta) in enumerate(dataLoader):
        input_var = torch.autograd.Variable(input).float().cuda(opt.GPU)
        target_var = []
        for t in range(len(targets)):
            target_var.append(
                torch.autograd.Variable(targets[t]).float().cuda(opt.GPU))
        z = []
        for k in range(opt.numNoise):
            noise = torch.autograd.Variable(
                torch.randn((input_var.shape[0], 1, 64, 64))).cuda(opt.GPU)
            z.append(noise)

        output, samples = model(input_var, z, action)
        pred_sample = maximumExpectedUtility(samples, criterion)
        target = maximumExpectedUtility(target_var, criterion)

        if opt.DEBUG >= 2:
            gt = getPreds(target.cpu().numpy()) * 4
            pred = getPreds((pred_sample.data).cpu().numpy()) * 4
            debugger = Debugger()
            img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(
                np.uint8).copy()
            debugger.addImg(img)
            debugger.addPoint2D(pred[0], (255, 0, 0))
            debugger.addPoint2D(gt[0], (0, 0, 255))
            debugger.showAllImg(pause=True)

        loss = DiscoLoss(output, samples, target_var, criterion)

        Loss.update(loss.item(), input.size(0))
        Acc.update(
            Accuracy((pred_sample.data).cpu().numpy(),
                     (target.data).cpu().numpy()))
        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            input_ = input.cpu().numpy()
            input_[0] = Flip(input_[0]).copy()
            inputFlip_var = torch.autograd.Variable(
                torch.from_numpy(input_).view(1, input_.shape[1], ref.inputRes,
                                              ref.inputRes)).float().cuda(
                                                  opt.GPU)
            _, samplesFlip = model(inputFlip_var, z, action)
            pred_sample_flip = maximumExpectedUtility(samplesFlip, criterion)
            outputFlip = ShuffleLR(
                Flip((pred_sample_flip.data).cpu().numpy()[0])).reshape(
                    1, ref.nJoints, ref.outputRes, ref.outputRes)
            output_ = old_div(((pred_sample.data).cpu().numpy() + outputFlip),
                              2)
            preds.append(
                finalPreds(output_, meta['center'], meta['scale'],
                           meta['rotate'])[0])

        Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format(
            epoch,
            i,
            nIters,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=Loss,
            Acc=Acc,
            split=split)
        bar.next()

    bar.finish()
    return {'Loss': Loss.avg, 'Acc': Acc.avg}, preds
Exemplo n.º 23
0
    num[class_name] += 1
    acc30[class_name] += 1 if err_ <= 30. else 0
    acc10[class_name] += 1 if err_ <= 10. else 0
    err[class_name].append(err_)
    Acc30 += 1 if err_ <= 30. else 0
    Acc10 += 1 if err_ <= 10. else 0
    bar.suffix = '[{0}/{1}]|Total: {total:} | ETA: {eta:} | Acc_10: {Acc10:.6f} | Acc_30: {Acc30:.6f}'.format(
        idx,
        n,
        total=bar.elapsed_td,
        eta=bar.eta_td,
        Acc10=Acc10 / (idx + 1.),
        Acc30=Acc30 / (idx + 1.))
    next(bar)
    if DEBUG:
        debugger = Debugger()
        input, target, mask = dataset[index]
        img = (input[:3].transpose(1, 2, 0) * 256).astype(np.uint8).copy()
        star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255)
        star[star > 255] = 255
        star[star < 0] = 0
        star = star.astype(np.uint8)

        for k in range(len(ps[0])):
            x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) *
                       ref.outputRes).astype(np.int32)
            dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) *
                   ref.outputRes).astype(np.int32)
            color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes,
                          1.0 * z / ref.outputRes))
            cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 6,
Exemplo n.º 24
0
def _debug(image, t_heat, l_heat, b_heat, r_heat, ct_heat):
    debugger = Debugger(num_classes=3)
    k = 0

    t_heat = torch.sigmoid(t_heat)
    l_heat = torch.sigmoid(l_heat)
    b_heat = torch.sigmoid(b_heat)
    r_heat = torch.sigmoid(r_heat)

    aggr_weight = 0.1
    t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight)
    print("[exkp.py _debug] final t_heat", t_heat.shape)
    l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight)
    b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight)
    r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight)
    t_heat[t_heat > 1] = 1
    l_heat[l_heat > 1] = 1
    b_heat[b_heat > 1] = 1
    r_heat[r_heat > 1] = 1

    ct_heat = torch.sigmoid(ct_heat)

    t_hm = debugger.gen_colormap(t_heat[k].cpu().data.numpy())
    l_hm = debugger.gen_colormap(l_heat[k].cpu().data.numpy())
    b_hm = debugger.gen_colormap(b_heat[k].cpu().data.numpy())
    r_hm = debugger.gen_colormap(r_heat[k].cpu().data.numpy())
    ct_hm = debugger.gen_colormap(ct_heat[k].cpu().data.numpy())

    hms = np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm))
    # debugger.add_img(hms, 'hms')
    if image is not None:
        mean = np.array([0.40789654, 0.44719302, 0.47026115],
                        dtype=np.float32).reshape(3, 1, 1)
        std = np.array([0.28863828, 0.27408164, 0.27809835],
                       dtype=np.float32).reshape(3, 1, 1)
        img = (image[k].cpu().data.numpy() * std + mean) * 255
        img = img.astype(np.uint8).transpose(1, 2, 0)
        debugger.add_img(img, 'img')
        # debugger.add_blend_img(img, t_hm, 't_hm')
        # debugger.add_blend_img(img, l_hm, 'l_hm')
        # debugger.add_blend_img(img, b_hm, 'b_hm')
        # debugger.add_blend_img(img, r_hm, 'r_hm')
        debugger.add_blend_img(img, hms, 'extreme')
        debugger.add_blend_img(img, ct_hm, 'center')
    debugger.show_all_imgs(pause=False)
Exemplo n.º 25
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        hm_hp = output['hm_hp'] if opt.hm_hp else None
        hp_offset = output['hp_offset'] if opt.reg_hp_offset else None
        dets = multi_pose_decode(output['hm'],
                                 output['wh'],
                                 output['hps'],
                                 reg=reg,
                                 hm_hp=hm_hp,
                                 hp_offset=hp_offset,
                                 K=opt.K)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])

        dets[:, :, :4] *= opt.input_res / opt.output_res
        dets[:, :, 5:39] *= opt.input_res / opt.output_res
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= opt.input_res / opt.output_res
        dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')

            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets[i, k, :4],
                                           dets[i, k, -1],
                                           dets[i, k, 4],
                                           img_id='out_pred')
                    debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets_gt[i, k, :4],
                                           dets_gt[i, k, -1],
                                           dets_gt[i, k, 4],
                                           img_id='out_gt')
                    debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')

            if opt.hm_hp:
                pred = debugger.gen_colormap_hp(
                    output['hm_hp'][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(
                    batch['hm_hp'][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, 'pred_hmhp')
                debugger.add_blend_img(img, gt, 'gt_hmhp')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Exemplo n.º 26
0
    def debug(self, batch, output, iter_id):
        cfg = self.cfg
        reg = output[3] if cfg.LOSS.REG_OFFSET else None
        hm_hp = output[4] if cfg.LOSS.HM_HP else None
        hp_offset = output[5] if cfg.LOSS.REG_HP_OFFSET else None
        dets = multi_pose_decode(output[0],
                                 output[1],
                                 output[2],
                                 reg=reg,
                                 hm_hp=hm_hp,
                                 hp_offset=hp_offset,
                                 K=cfg.TEST.TOPK)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])

        dets[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        dets[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        dets_gt[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        for i in range(1):
            debugger = Debugger(dataset=cfg.SAMPLE_METHOD,
                                ipynb=(cfg.DEBUG == 3),
                                theme=cfg.DEBUG_THEME)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * np.array(cfg.DATASET.STD).reshape(
                1, 1, 3).astype(np.float32) + cfg.DATASET.MEAN) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(output[0][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')

            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > cfg.MODEL.CENTER_THRESH:
                    debugger.add_coco_bbox(dets[i, k, :4],
                                           dets[i, k, -1],
                                           dets[i, k, 4],
                                           img_id='out_pred')
                    debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > cfg.MODEL.CENTER_THRESH:
                    debugger.add_coco_bbox(dets_gt[i, k, :4],
                                           dets_gt[i, k, -1],
                                           dets_gt[i, k, 4],
                                           img_id='out_gt')
                    debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')

            if cfg.LOSS.HM_HP:
                pred = debugger.gen_colormap_hp(
                    output[4][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(
                    batch['hm_hp'][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, 'pred_hmhp')
                debugger.add_blend_img(img, gt, 'gt_hmhp')

            if cfg.DEBUG == 4:
                debugger.save_all_imgs(cfg.LOG_DIR,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Exemplo n.º 27
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        wh = output['wh'] if opt.reg_bbox else None
        reg = output['reg'] if opt.reg_offset else None
        dets = ddd_decode(output['hm'],
                          output['rot'],
                          output['dep'],
                          output['dim'],
                          wh=wh,
                          reg=reg,
                          K=opt.K)

        # x, y, score, r1-r8, depth, dim1-dim3, cls
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        calib = batch['meta']['calib'].detach().numpy()
        # x, y, score, rot, depth, dim1, dim2, dim3
        # if opt.dataset == 'gta':
        #   dets[:, 12:15] /= 3
        dets_pred = ddd_post_process(dets.copy(),
                                     batch['meta']['c'].detach().numpy(),
                                     batch['meta']['s'].detach().numpy(),
                                     calib, opt)
        dets_gt = ddd_post_process(
            batch['meta']['gt_det'].detach().numpy().copy(),
            batch['meta']['c'].detach().numpy(),
            batch['meta']['s'].detach().numpy(), calib, opt)
        #for i in range(input.size(0)):
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = ((img * self.opt.std + self.opt.mean) * 255.).astype(
                np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'hm_pred')
            debugger.add_blend_img(img, gt, 'hm_gt')
            # decode
            debugger.add_ct_detection(img,
                                      dets[i],
                                      show_box=opt.reg_bbox,
                                      center_thresh=opt.center_thresh,
                                      img_id='det_pred')
            debugger.add_ct_detection(
                img,
                batch['meta']['gt_det'][i].cpu().numpy().copy(),
                show_box=opt.reg_bbox,
                img_id='det_gt')
            debugger.add_3d_detection(batch['meta']['image_path'][i],
                                      dets_pred[i],
                                      calib[i],
                                      center_thresh=opt.center_thresh,
                                      img_id='add_pred')
            debugger.add_3d_detection(batch['meta']['image_path'][i],
                                      dets_gt[i],
                                      calib[i],
                                      center_thresh=opt.center_thresh,
                                      img_id='add_gt')
            # debugger.add_bird_view(
            #   dets_pred[i], center_thresh=opt.center_thresh, img_id='bird_pred')
            # debugger.add_bird_view(dets_gt[i], img_id='bird_gt')
            debugger.add_bird_views(dets_pred[i],
                                    dets_gt[i],
                                    center_thresh=opt.center_thresh,
                                    img_id='bird_pred_gt')

            # debugger.add_blend_img(img, pred, 'out', white=True)
            debugger.compose_vis_add(batch['meta']['image_path'][i],
                                     dets_pred[i],
                                     calib[i],
                                     opt.center_thresh,
                                     pred,
                                     'bird_pred_gt',
                                     img_id='out')
            # debugger.add_img(img, img_id='out')
            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Exemplo n.º 28
0
        scale = 1.0 * h / mheight
        new_im = image.resize((int(w / scale), int(h / scale)),
                              Image.ANTIALIAS)
    new_im.save(filename)
    new_im.close()


#opt = opts().parse()

imageName = './images/test3.jpg'

#process_image(imageName)

model = torch.load('../model/Stage3/model_10.pth',
                   map_location=lambda storage, loc: storage)
img = cv2.imread(imageName)
print(type(np.array(img)))
input = torch.from_numpy(img.transpose(2, 0, 1)).float() / 256.
input = input.view(1, input.size(0), input.size(1), input.size(2))
input_var = torch.autograd.Variable(input).float()
output = model(input_var)
pred = getPreds((output[-2].data).cpu().numpy())[0] * 4
reg = (output[-1].data).cpu().numpy().reshape(pred.shape[0], 1)
print(pred, (reg + 1) / 2. * 256)
debugger = Debugger()
debugger.addImg((input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8))
debugger.addPoint2D(pred, (255, 0, 0))
debugger.addPoint3D(np.concatenate([pred, (reg + 1) / 2. * 256], axis=1))
debugger.showImg(pause=True)
debugger.show3D()
Exemplo n.º 29
0
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(
                ''):  # 如果终端输入的是图片路径或者folder,均会在demo.py中转化为图片路径
            image = cv2.imread(image_or_path_or_tensor)
            image_name = image_or_path_or_tensor.split('/')[-1]
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)
            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            output, dets, forward_time = self.process(images, return_time=True)

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.opt.debug >= 2:
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        if self.opt.debug >= 1:
            self.save_results(debugger, image, results, image_name)

        return {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }
Exemplo n.º 30
0
    def run(self, image_or_path_or_tensor, out, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False  #图像载入:判断命令行给的是图片、路径还是张量
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)  # 载入图片的时间

        detections = []
        for scale in self.scales:  #scales 应该是将图片扩大一定倍数后检测
            # print(self.scales)
            scale_start_time = time.time()
            if not pre_processed:  #如果给的是图片或路径那就预处理一下
                images, meta = self.pre_process(image, scale, meta)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)  #image放入GPU
            torch.cuda.synchronize()  #让所有核同步,测得真实时间
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time  #预处理时间

            output, dets, forward_time = self.process(
                images, return_time=True
            )  #送入预测得到预测数据与包围盒及当前时间,dets是一个len=80的张量,每个元素是一个N * 5的ndarray

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time  #预测热力图的时间
            decode_time = time.time()
            dec_time += decode_time - forward_time  #热力图解码的时间

            if self.opt.debug >= 2:  #debug大于2,则输出三种图:预测图,resize后预测图,热力图
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)  #
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time  #坐标系数回归过程的时间

            detections.append(dets)
            # print(detections)
        results = self.merge_outputs(detections)  # 回归到真实坐标
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time  #回归到真实坐标时间
        tot_time += end_time - start_time  #总时间

        #表情识别接口
        # emotion_labels = {'0':'angry', '1':'disgust', '2':'fear', '3':'happy', '4':'sad', '5':'surprise', '6':'netural'}
        # emotion_model_path = '../models/emotion_models/simple_CNN.985-0.66.hdf5'
        # emotion_classifier = load_keras_model(emotion_model_path)

        # img = cv2.imread('depressed_412.jpg')
        # img = cv2.resize(img,(224,224))
        # img = transforms.ToTensor()(img)
        # with torch.no_grad():
        # pt = self.model1(img)
        visualize_model(self.model1, num_images=2)

        # print(pt)
        # for detection in detections:
        # faces = self.gray_preprocess(results)
        # print(faces)
        # for face in faces:
        # emotion_predict = self.model1(face)
        # print(emotion_predict)
        # emotion_text = emotion_labels[emotion_predict]

        if self.opt.debug >= 1:
            self.show_results(debugger, image, results, out=out)

        return {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }