Exemplo n.º 1
0
def frameAP(opt, print_info=True):
    redo = opt.redo
    th = opt.th
    split = 'val'
    model_name = opt.model_name
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)

    inference_dirname = opt.inference_dir
    print('inference_dirname is ', inference_dirname)
    print('threshold is ', th)

    vlist = dataset._test_videos[opt.split - 1]
    # load per-frame detections
    frame_detections_file = os.path.join(inference_dirname,
                                         'frame_detections.pkl')
    if os.path.isfile(frame_detections_file) and not redo:
        with open(frame_detections_file, 'rb') as fid:
            alldets = pickle.load(fid)
    else:
        alldets = load_frame_detections(opt, dataset, opt.K, vlist,
                                        inference_dirname)
        try:
            with open(frame_detections_file, 'wb') as fid:
                pickle.dump(alldets, fid, protocol=4)
        except:
            print(
                "OverflowError: cannot serialize a bytes object larger than 4 GiB"
            )

    results = {}
    # compute AP for each class
    for ilabel, label in enumerate(dataset.labels):
        # detections of this class
        detections = alldets[alldets[:, 2] == ilabel, :]

        # load ground-truth of this class
        gt = {}
        for iv, v in enumerate(vlist):
            tubes = dataset._gttubes[v]

            if ilabel not in tubes:
                continue

            for tube in tubes[ilabel]:
                for i in range(tube.shape[0]):
                    k = (iv, int(tube[i, 0]))
                    if k not in gt:
                        gt[k] = []
                    gt[k].append(tube[i, 1:5].tolist())

        for k in gt:
            gt[k] = np.array(gt[k])

        # pr will be an array containing precision-recall values
        pr = np.empty((detections.shape[0] + 1, 2),
                      dtype=np.float32)  # precision,recall
        pr[0, 0] = 1.0
        pr[0, 1] = 0.0
        fn = sum([g.shape[0] for g in gt.values()])  # false negatives
        fp = 0  # false positives
        tp = 0  # true positives

        for i, j in enumerate(np.argsort(-detections[:, 3])):
            k = (int(detections[j, 0]), int(detections[j, 1]))
            box = detections[j, 4:8]
            ispositive = False

            if k in gt:
                ious = iou2d(gt[k], box)
                amax = np.argmax(ious)

                if ious[amax] >= th:
                    ispositive = True
                    gt[k] = np.delete(gt[k], amax, 0)

                    if gt[k].size == 0:
                        del gt[k]

            if ispositive:
                tp += 1
                fn -= 1
            else:
                fp += 1

            pr[i + 1, 0] = float(tp) / float(tp + fp)
            pr[i + 1, 1] = float(tp) / float(tp + fn)

        results[label] = pr

    # display results
    ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels])
    frameap_result = np.mean(ap)
    if print_info:
        log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+')
        log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th))
        print('Task_{} frameAP_{}\n'.format(model_name, th))
        log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result))
        log_file.close()
        print("{:20s} {:8.2f}".format("mAP", frameap_result))

    return frameap_result
Exemplo n.º 2
0
def videoAP(opt, print_info=True):

    th = opt.th
    model_name = opt.model_name
    split = 'val'
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)

    inference_dirname = opt.inference_dir

    vlist = dataset._test_videos[opt.split - 1]
    # load detections
    # alldets = for each label in 1..nlabels, list of tuple (v,score,tube as Kx5 array)
    alldets = {ilabel: [] for ilabel in range(len(dataset.labels))}
    for v in vlist:
        tubename = os.path.join(inference_dirname, v + '_tubes.pkl')
        if not os.path.isfile(tubename):
            print("ERROR: Missing extracted tubes " + tubename)
            sys.exit()

        with open(tubename, 'rb') as fid:
            tubes = pickle.load(fid)
        for ilabel in range(len(dataset.labels)):
            ltubes = tubes[ilabel]
            idx = nms3dt(ltubes, 0.3)
            alldets[ilabel] += [(v, ltubes[i][1], ltubes[i][0]) for i in idx]

    # compute AP for each class
    res = {}
    for ilabel in range(len(dataset.labels)):
        detections = alldets[ilabel]
        # load ground-truth
        gt = {}
        for v in vlist:
            tubes = dataset._gttubes[v]

            if ilabel not in tubes:
                continue

            gt[v] = tubes[ilabel]

            if len(gt[v]) == 0:
                del gt[v]

        # precision,recall
        pr = np.empty((len(detections) + 1, 2), dtype=np.float32)
        pr[0, 0] = 1.0
        pr[0, 1] = 0.0

        fn = sum([len(g) for g in gt.values()])  # false negatives
        fp = 0  # false positives
        tp = 0  # true positives

        for i, j in enumerate(
                np.argsort(-np.array([dd[1] for dd in detections]))):
            v, score, tube = detections[j]
            ispositive = False

            if v in gt:
                ious = [iou3dt(g, tube) for g in gt[v]]
                amax = np.argmax(ious)
                if ious[amax] >= th:
                    ispositive = True
                    del gt[v][amax]
                    if len(gt[v]) == 0:
                        del gt[v]

            if ispositive:
                tp += 1
                fn -= 1
            else:
                fp += 1

            pr[i + 1, 0] = float(tp) / float(tp + fp)
            pr[i + 1, 1] = float(tp) / float(tp + fn)

        res[dataset.labels[ilabel]] = pr

    # display results
    ap = 100 * np.array([pr_to_ap(res[label]) for label in dataset.labels])
    videoap_result = np.mean(ap)

    if print_info:
        log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+')
        log_file.write('\nTask_{} VideoAP_{}\n'.format(model_name, th))
        print('Task_{} VideoAP_{}\n'.format(opt.model_name, th))
        # for il, _ in enumerate(dataset.labels):
        # print("{:20s} {:8.2f}".format('', ap[il]))
        # log_file.write("{:20s} {:8.2f}\n".format('', ap[il]))
        log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", videoap_result))
        log_file.close()
        print("{:20s} {:8.2f}".format("mAP", videoap_result))
    return videoap_result
Exemplo n.º 3
0
def main(opt):
    set_seed(opt.seed)

    torch.backends.cudnn.benchmark = True
    print()
    print('dataset: ' + opt.dataset + '   task:  ' + opt.task)
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset(opt, Dataset)

    train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train'))
    epoch_train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train_epoch'))
    val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val'))
    epoch_val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val_epoch'))

    logger = Logger(opt, epoch_train_writer, epoch_val_writer)

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')

    model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K)
    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    start_epoch = opt.start_epoch

    if opt.pretrain_model == 'coco':
        model = load_coco_pretrained_model(opt, model)
    else:
        model = load_imagenet_pretrained_model(opt, model)

    if opt.load_model != '':
        model, optimizer, _, _ = load_model(model, opt.load_model, optimizer,
                                            opt.lr, opt.ucf_pretrain)

    trainer = MOCTrainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=opt.pin_memory,
                                               drop_last=True,
                                               worker_init_fn=worker_init_fn)
    val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'),
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers,
                                             pin_memory=opt.pin_memory,
                                             drop_last=True,
                                             worker_init_fn=worker_init_fn)

    print('training...')
    print('GPU allocate:', opt.chunk_sizes)
    best_ap = 0
    best_epoch = 0
    stop_step = 0
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        print('eopch is ', epoch)
        log_dict_train = trainer.train(epoch, train_loader, train_writer)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'train')
            logger.write('train: {} {:8f} | '.format(k, v))
        logger.write('\n')
        if opt.save_all and not opt.auto_stop:
            time_str = time.strftime('%Y-%m-%d-%H-%M')
            model_name = 'model_[{}]_{}.pth'.format(epoch, time_str)
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])
        else:
            model_name = 'model_last.pth'
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])

        # this step evaluate the model
        if opt.val_epoch:
            with torch.no_grad():
                log_dict_val = trainer.val(epoch, val_loader, val_writer)
            for k, v in log_dict_val.items():
                logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'val')
                logger.write('val: {} {:8f} | '.format(k, v))
        logger.write('\n')

        if opt.auto_stop:
            tmp_rgb_model = opt.rgb_model
            tmp_flow_model = opt.flow_model
            if opt.rgb_model != '':
                opt.rgb_model = os.path.join(opt.rgb_model, model_name)
            if opt.flow_model != '':
                opt.flow_model = os.path.join(opt.flow_model, model_name)
            stream_inference(opt)
            ap = frameAP(opt, print_info=opt.print_log)
            os.system("rm -rf tmp")
            if ap > best_ap:
                best_ap = ap
                best_epoch = epoch
                saved1 = os.path.join(opt.save_dir, model_name)
                saved2 = os.path.join(opt.save_dir, 'model_best.pth')
                os.system("cp " + str(saved1) + " " + str(saved2))
            if stop_step < len(
                    opt.lr_step) and epoch >= opt.lr_step[stop_step]:
                model, optimizer, _, _ = load_model(
                    model, os.path.join(opt.save_dir, 'model_best.pth'),
                    optimizer, opt.lr)
                opt.lr = opt.lr * 0.1
                logger.write('Drop LR to ' + str(opt.lr) + '\n')
                print('Drop LR to ' + str(opt.lr))
                print('load epoch is ', best_epoch)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = opt.lr
                torch.cuda.empty_cache()
                trainer = MOCTrainer(opt, model, optimizer)
                trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
                stop_step = stop_step + 1

            opt.rgb_model = tmp_rgb_model
            opt.flow_model = tmp_flow_model

        else:
            # this step drop lr
            if epoch in opt.lr_step:
                lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
                logger.write('Drop LR to ' + str(lr) + '\n')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
    if opt.auto_stop:
        print('best epoch is ', best_epoch)

    logger.close()
Exemplo n.º 4
0
def frameAP_error(opt, redo=False):
    th = opt.th
    split = 'val'
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)
    inference_dirname = opt.inference_dir
    print('inference_dirname is ', inference_dirname)
    print('threshold is ', th)
    eval_file = os.path.join(inference_dirname,
                             "frameAP{:g}ErrorAnalysis.pkl".format(th))

    if os.path.isfile(eval_file) and not redo:
        with open(eval_file, 'rb') as fid:
            res = pickle.load(fid)
    else:
        vlist = dataset._test_videos[opt.split - 1]
        # load per- frame detections
        frame_detections_file = os.path.join(inference_dirname,
                                             'frame_detections.pkl')
        if os.path.isfile(frame_detections_file) and not redo:
            print('load frameAP pre-result')
            with open(frame_detections_file, 'rb') as fid:
                alldets = pickle.load(fid)
        else:
            alldets = load_frame_detections(opt, dataset, opt.K, vlist,
                                            inference_dirname)
            with open(frame_detections_file, 'wb') as fid:
                pickle.dump(alldets, fid)
        res = {}
        # alldets: list of numpy array with <video_index> <frame_index> <ilabel> <score> <x1> <y1> <x2> <y2>
        # compute AP for each class
        print(len(dataset.labels))
        for ilabel, label in enumerate(dataset.labels):
            # detections of this class
            detections = alldets[alldets[:, 2] == ilabel, :]

            gt = {}
            othergt = {}
            labellist = {}

            # iv,v : 0 Basketball/v_Basketball_g01_c01
            for iv, v in enumerate(vlist):
                # tubes: dict {ilabel: (list of)<frame number> <x1> <y1> <x2> <y2>}
                tubes = dataset._gttubes[v]
                # labellist[iv]: label list for v
                labellist[iv] = tubes.keys()

                for il in tubes:
                    # tube: list of <frame number> <x1> <y1> <x2> <y2>
                    for tube in tubes[il]:
                        for i in range(tube.shape[0]):
                            # k: (video_index, frame_index)
                            k = (iv, int(tube[i, 0]))
                            if il == ilabel:
                                if k not in gt:
                                    gt[k] = []
                                gt[k].append(tube[i, 1:5].tolist())
                            else:
                                if k not in othergt:
                                    othergt[k] = []
                                othergt[k].append(tube[i, 1:5].tolist())

            for k in gt:
                gt[k] = np.array(gt[k])
            for k in othergt:
                othergt[k] = np.array(othergt[k])

            dupgt = deepcopy(gt)

            # pr will be an array containing precision-recall values and 4 types of errors:
            # localization, classification, timing, others
            pr = np.empty((detections.shape[0] + 1, 6),
                          dtype=np.float32)  # precision, recall
            pr[0, 0] = 1.0
            pr[0, 1:] = 0.0

            fn = sum([g.shape[0] for g in gt.values()])  # false negatives
            fp = 0  # false positives
            tp = 0  # true positives
            EL = 0  # localization errors
            EC = 0  # classification error: overlap >=0.5 with an another object
            EO = 0  # other errors
            ET = 0  # timing error: the video contains the action but not at this frame

            for i, j in enumerate(np.argsort(-detections[:, 3])):
                k = (int(detections[j, 0]), int(detections[j, 1]))
                box = detections[j, 4:8]
                ispositive = False

                if k in dupgt:
                    if k in gt:
                        ious = iou2d(gt[k], box)
                        amax = np.argmax(ious)
                    if k in gt and ious[amax] >= th:
                        ispositive = True
                        gt[k] = np.delete(gt[k], amax, 0)
                        if gt[k].size == 0:
                            del gt[k]
                    else:
                        EL += 1

                elif k in othergt:
                    ious = iou2d(othergt[k], box)
                    if np.max(ious) >= th:
                        EC += 1
                    else:
                        EO += 1
                elif ilabel in labellist[k[0]]:
                    ET += 1
                else:
                    EO += 1
                if ispositive:
                    tp += 1
                    fn -= 1
                else:
                    fp += 1

                pr[i + 1, 0] = float(tp) / float(tp + fp)  # precision
                pr[i + 1, 1] = float(tp) / float(tp + fn)  # recall
                pr[i + 1, 2] = float(EL) / float(tp + fp)
                pr[i + 1, 3] = float(EC) / float(tp + fp)
                pr[i + 1, 4] = float(ET) / float(tp + fp)
                pr[i + 1, 5] = float(EO) / float(tp + fp)

            res[label] = pr

        # save results
        with open(eval_file, 'wb') as fid:
            pickle.dump(res, fid)

    # display results
    AP = 100 * np.array(
        [pr_to_ap(res[label][:, [0, 1]]) for label in dataset.labels])
    othersap = [
        100 *
        np.array([pr_to_ap(res[label][:, [j, 1]]) for label in dataset.labels])
        for j in range(2, 6)
    ]

    EL = othersap[0]
    EC = othersap[1]
    ET = othersap[2]
    EO = othersap[3]
    # missed detections = 1 - recall
    EM = 100 - 100 * np.array([res[label][-1, 1] for label in dataset.labels])

    LIST = [AP, EL, EC, ET, EO, EM]

    print('Error Analysis')

    print("")
    print("{:20s} {:8s} {:8s} {:8s} {:8s} {:8s} {:8s}".format(
        'label', '   AP   ', '  Loc.  ', '  Cls.  ', '  Time  ', ' Other ',
        ' missed '))
    print("")
    for il, label in enumerate(dataset.labels):
        print("{:20s} ".format(label) +
              " ".join(["{:8.2f}".format(L[il]) for L in LIST]))

    print("")
    print("{:20s} ".format("mean") +
          " ".join(["{:8.2f}".format(np.mean(L)) for L in LIST]))
    print("")
Exemplo n.º 5
0
def main(opt):
    # added to specify gpu id; the gpus arg in the provided code does not work
    torch.cuda.set_device(opt.gpus[0])

    set_seed(opt.seed)

    print('dataset: ' + opt.dataset + '   task:  ' + opt.task)
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset(opt, Dataset)

    train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train'))
    epoch_train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train_epoch'))
    val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val'))
    epoch_val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val_epoch'))

    logger = Logger(opt, epoch_train_writer, epoch_val_writer)

    opt.device = torch.device('cuda')

    is_pa = False
    if opt.pa_model != '':
        is_pa = True
    model = create_model(opt.arch,
                         opt.branch_info,
                         opt.head_conv,
                         opt.K,
                         is_pa=is_pa,
                         pa_fuse_mode=opt.pa_fuse_mode,
                         rgb_w3=opt.rgb_w3)

    # TODO: Compute grad magnitude (maybe check youssef's snippet)
    # TODO: Log grad to TB
    # default (single set of hyperparam)

    # Complexity analysis
    '''
    with torch.cuda.device(1):
      macs, params = get_model_complexity_info(model, (15, 288, 288), input_constructor=prepare_input, as_strings=True,
                                               print_per_layer_stat=True, verbose=True)
      print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
      print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    '''

    # orig
    #optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    # custom
    lr_factor = 1.0
    if opt.pa_model != '':
        optimizer = torch.optim.Adam([{
            "params": model.pa.parameters(),
            "lr": opt.lr * lr_factor
        }, {
            "params": model.backbone.parameters(),
            "lr": opt.lr
        }, {
            "params": model.deconv_layer.parameters(),
            "lr": opt.lr
        }, {
            "params": model.branch.parameters(),
            "lr": opt.lr
        }], opt.lr)
    else:  # rgb model
        optimizer = torch.optim.Adam([{
            "params": model.backbone.parameters(),
            "lr": opt.lr
        }, {
            "params": model.deconv_layer.parameters(),
            "lr": opt.lr
        }, {
            "params": model.branch.parameters(),
            "lr": opt.lr
        }], opt.lr)

    start_epoch = opt.start_epoch

    # ADDED: allowing automatica lr dropping upon resuming a training
    step_count = 0
    for step in range(len(opt.lr_step)):
        if start_epoch >= opt.lr_step[step]:
            step_count += 1
    opt.lr = opt.lr * (opt.lr_drop**step_count)

    if opt.pretrain_model == 'coco':
        model = load_coco_pretrained_model(opt, model)
    elif opt.pretrain_model == 'imagenet':
        model = load_imagenet_pretrained_model(opt, model)
    else:
        model = load_custom_pretrained_model(opt, model)

    if opt.load_model != '':
        model, optimizer, _, _ = load_model(model, opt.load_model, optimizer,
                                            opt.lr, opt.ucf_pretrain)

    for i, child in enumerate(model.children()):
        pass
        #if i == 2 or i == 3: # unfreeze branch, deconv: reproducible! but not pa nor backbone
        #    for l, param in enumerate(child.parameters()):
        #            param.requires_grad = False
        '''
        if i == 0: # PA
            continue 
            #for l, param in enumerate(child.parameters()):
                #if l < 3: # 3: conv1 15: block2
                #param.requires_grad = False
        elif i == 1: # backbone
            continue
        
            #for l, param in enumerate(child.parameters()):
                
                #print ('layer {} shape: {}'.format(l, param.size()))
                #if l == 2 or l == 3 or l == 4: # 5: conv1 and conv1_5, 30: resnext_layer1
                    #param.requires_grad = False
        elif i == 2: # deconv
            for l, param in enumerate(child.parameters()):
                param.requires_grad = False
        '''
        #else:
        #for name, module in child.named_modules():
        #if name in list_of_lay_freeze:
        #for param in module.parameters():
        #param.requires_grad = False
        #if isinstance(module, torch.nn.ReLU):
        #break
        #print (name)

    trainer = MOCTrainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=opt.pin_memory,
                                               drop_last=True,
                                               worker_init_fn=worker_init_fn)
    val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'),
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers,
                                             pin_memory=opt.pin_memory,
                                             drop_last=True,
                                             worker_init_fn=worker_init_fn)

    print('training...')
    print('GPU allocate:', opt.chunk_sizes)
    best_ap = 0
    best_epoch = 0
    stop_step = 0  # TODO: this needs to be adjusted otherwise lr is dropped incorrectly when resuming training! (can set to 1 now if resuming from drop-once)

    # added: to ensure no decrease of lr too early (for jh s1?)
    if stop_step == 0:
        drop_early_flag = False  # should be False if wanting more reproducible results  (e.g., jh s1)
    else:
        drop_early_flag = True

    set_seed(opt.seed)  #317

    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        print('eopch is ', epoch)
        log_dict_train = trainer.train(epoch, train_loader, train_writer)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'train')
            logger.write('train: {} {:8f} | '.format(k, v))
        logger.write('\n')
        if opt.save_all and not opt.auto_stop:
            time_str = time.strftime('%Y-%m-%d-%H-%M')
            model_name = 'model_[{}]_{}.pth'.format(epoch, time_str)
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])
        else:
            model_name = 'model_last.pth'
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])

        # this step evaluate the model
        if opt.val_epoch:
            with torch.no_grad():
                log_dict_val = trainer.val(epoch, val_loader, val_writer)
            for k, v in log_dict_val.items():
                logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'val')
                logger.write('val: {} {:8f} | '.format(k, v))
        logger.write('\n')

        if opt.auto_stop:
            tmp_rgb_model = opt.rgb_model
            tmp_flow_model = opt.flow_model
            tmp_pa_model = opt.pa_model
            if opt.rgb_model != '':
                opt.rgb_model = os.path.join(opt.rgb_model, model_name)
            if opt.flow_model != '':
                opt.flow_model = os.path.join(opt.flow_model, model_name)
            if opt.pa_model != '':
                opt.pa_model = os.path.join(opt.pa_model, model_name)

            # orig: difficult to handle with long-range mem
            #stream_inference(opt)
            normal_inference(opt)

            ap = frameAP(opt, print_info=opt.print_log)

            ### added for debug
            print('frame mAP: {}'.format(ap))

            os.system("rm -rf tmp")
            if ap > best_ap:
                best_ap = ap
                best_epoch = epoch
                saved1 = os.path.join(opt.save_dir, model_name)
                saved2 = os.path.join(opt.save_dir, 'model_best.pth')
                os.system("cp " + str(saved1) + " " + str(saved2))
            if stop_step < len(
                    opt.lr_step) and epoch >= opt.lr_step[stop_step]:

                # added: don't want it to decrease lr too early just bc the map was higher there ...
                # seemed to create problem for jh s1
                if drop_early_flag is False:
                    model, optimizer, _, _ = load_model(
                        model, os.path.join(opt.save_dir, 'model_last.pth'),
                        optimizer, opt.lr)  # model_best -> model_last?
                    drop_early_flag = True
                    print('load epoch is ', epoch)

                else:  # after the first drop, the rest could drop based on mAP
                    model, optimizer, _, _ = load_model(
                        model, os.path.join(opt.save_dir, 'model_best.pth'),
                        optimizer, opt.lr)  # model_best -> model_last?
                    print('load epoch is ', best_epoch)

                opt.lr = opt.lr * opt.lr_drop
                logger.write('Drop LR to ' + str(opt.lr) + '\n')

                for ii, param_group in enumerate(optimizer.param_groups):
                    if ii >= 1:  # backbone + deconv
                        param_group['lr'] = opt.lr
                    else:
                        param_group['lr'] = opt.lr * lr_factor

                print('Drop PA LR to ' + str(opt.lr * lr_factor))
                print('Drop backbone LR to ' + str(opt.lr))
                print('Drop branch LR to ' + str(opt.lr))

                torch.cuda.empty_cache()
                trainer = MOCTrainer(opt, model, optimizer)
                trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
                stop_step = stop_step + 1

            opt.rgb_model = tmp_rgb_model
            opt.flow_model = tmp_flow_model
            opt.pa_model = tmp_pa_model

        else:
            # this step drop lr
            if epoch in opt.lr_step:
                lr = opt.lr * (opt.lr_drop**(opt.lr_step.index(epoch) + 1))
                logger.write('Drop LR to ' + str(lr) + '\n')

                # added for debug
                print('Drop LR to ' + str(lr) + '\n')

                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
    if opt.auto_stop:
        print('best epoch is ', best_epoch)

    logger.close()
Exemplo n.º 6
0
def videoAP(opt, print_info=True):

    th = opt.th
    model_name = opt.model_name
    split = 'val'
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)

    inference_dirname = opt.inference_dir

    vlist = dataset._test_videos[opt.split - 1]
    # load detections
    # alldets = for each label in 1..nlabels, list of tuple (v,score,tube as Kx5 array)
    alldets = {ilabel: [] for ilabel in range(len(dataset.labels))}
    for v in vlist:
        tubename = os.path.join(inference_dirname, v + '_tubes.pkl')
        if not os.path.isfile(tubename):
            print("ERROR: Missing extracted tubes " + tubename)
            sys.exit()

        with open(tubename, 'rb') as fid:
            tubes = pickle.load(fid)
        for ilabel in range(len(dataset.labels)):
            ltubes = tubes[ilabel]
            idx = nms3dt(ltubes, 0.3)
            alldets[ilabel] += [(v, ltubes[i][1], ltubes[i][0]) for i in idx]

    # compute AP for each class
    #target_class = [5]

    res = {}
    for ilabel in range(len(dataset.labels)):

        #if ilabel not in target_class:
        #   continue

        detections = alldets[ilabel]
        # load ground-truth
        gt = {}
        for v in vlist:
            tubes = dataset._gttubes[v]

            if ilabel not in tubes:
                continue

            gt[v] = tubes[ilabel]

            if len(gt[v]) == 0:
                del gt[v]

        num_gt = len(gt)
        # precision,recall
        pr = np.empty((len(detections) + 1, 2), dtype=np.float32)
        pr[0, 0] = 1.0
        pr[0, 1] = 0.0

        fn = sum([len(g) for g in gt.values()])  # false negatives
        fp = 0  # false positives
        tp = 0  # true positives

        for i, j in enumerate(
                np.argsort(-np.array([dd[1] for dd in detections]))):
            v, score, tube = detections[j]
            ispositive = False

            #if v == 'jump/Gregoire_Airman_showreel_2008_jump_f_cm_np1_le_bad_2' or v == 'jump/Gregoire_Airman_showreel_2008_jump_f_cm_np1_ri_bad_5' or v == 'jump/Sommerland_Syd_sprung_in_den_tod_jump_f_cm_np1_fr_bad_0':
            #print()

            if v in gt:
                ious = [iou3dt(g, tube) for g in gt[v]]
                amax = np.argmax(ious)
                if ious[amax] >= th:
                    ispositive = True
                    del gt[v][amax]
                    if len(gt[v]) == 0:
                        del gt[v]

            if ispositive:
                tp += 1
                fn -= 1
            else:
                fp += 1

            pr[i + 1, 0] = float(tp) / float(tp + fp)
            pr[i + 1, 1] = float(tp) / float(tp + fn)

        #if fn != 0:
        print('Failed to detect {}/{} tubes for class {}'.format(
            fn, num_gt, ilabel))
        res[dataset.labels[ilabel]] = pr

    # display results
    ap = 100 * np.array([pr_to_ap(res[label]) for label in dataset.labels])

    # ADDED: display individual class performance
    frameap_percls = {}
    for cl, cls_name in enumerate(dataset.labels):
        frameap_percls[cls_name] = ap[cl]

    for key, value in frameap_percls.items():
        print(key, ':', value)

    videoap_result = np.mean(ap)

    if print_info:
        log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+')
        log_file.write('\nTask_{} VideoAP_{}\n'.format(model_name, th))
        print('Task_{} VideoAP_{}\n'.format(opt.model_name, th))
        # for il, _ in enumerate(dataset.labels):
        # print("{:20s} {:8.2f}".format('', ap[il]))
        # log_file.write("{:20s} {:8.2f}\n".format('', ap[il]))
        log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", videoap_result))
        log_file.close()
        print("{:20s} {:8.2f}".format("mAP", videoap_result))
    return videoap_result
Exemplo n.º 7
0
def frameAP(opt, print_info=True):
    redo = opt.redo
    th = opt.th
    split = 'val'
    model_name = opt.model_name
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)

    inference_dirname = opt.inference_dir
    print('inference_dirname is ', inference_dirname)
    print('threshold is ', th)

    # ORIG
    vlist = dataset._test_videos[opt.split - 1]
    #vlist = dataset._train_videos[opt.split - 1]
    '''
    # ADDED: to analyze a specific class
    tar_class = 'wave'
    vlist_filt = []
    for vv in range(len(vlist)):
        cls_name, clip_name = vlist[vv].split('/')
        if cls_name == tar_class:
            vlist_filt.append(vlist[vv])
    vlist = vlist_filt
    '''

    # load per-frame detections
    frame_detections_file = os.path.join(inference_dirname,
                                         'frame_detections.pkl')
    if os.path.isfile(frame_detections_file) and not redo:
        print('load previous linking results...')
        print('if you want to reproduce it, please add --redo')
        with open(frame_detections_file, 'rb') as fid:
            alldets = pickle.load(fid)
    else:

        if opt.inference_mode == 'stream':
            alldets = load_frame_detections_stream(opt, dataset, opt.K, vlist,
                                                   inference_dirname)
        else:
            alldets = load_frame_detections(opt, dataset, opt.K, vlist,
                                            inference_dirname)
        try:
            with open(frame_detections_file, 'wb') as fid:
                pickle.dump(alldets, fid, protocol=4)
        except:
            print(
                "OverflowError: cannot serialize a bytes object larger than 4 GiB"
            )

    results = {}
    # compute AP for each class
    for ilabel, label in enumerate(dataset.labels):  # e.g.: 0, 'brush_hair'
        # detections of this class
        detections = alldets[alldets[:, 2] == ilabel, :]

        # load ground-truth of this class
        gt = {}
        for iv, v in enumerate(vlist):
            tubes = dataset._gttubes[v]

            if ilabel not in tubes:
                continue

            for tube in tubes[ilabel]:
                for i in range(tube.shape[0]):  # for each frame
                    k = (iv, int(tube[i, 0]))  # video id, frame id
                    if k not in gt:  # if not yet added to gt
                        gt[k] = []
                    gt[k].append(tube[i, 1:5].tolist())

        for k in gt:
            gt[k] = np.array(gt[k])

        # added: record of the original gt of a class (it won't be deleted or modified during evaluation)
        if opt.evaluation_mode == 'trimmed':
            gt_past = copy.deepcopy(gt)
            gt_keys_list = list(gt.keys())
            gt_vid = []
            for vv in gt_keys_list:
                if vv[0] in gt_vid:
                    continue
                gt_vid.append(vv[0])

        # pr will be an array containing precision-recall values
        #pr = np.empty((detections.shape[0] + 1, 2), dtype=np.float32)  # precision,recall
        pr = -1 * np.ones(
            (detections.shape[0] + 1, 2), dtype=np.float32)  # precision,recall
        pr[0, 0] = 1.0
        pr[0, 1] = 0.0
        fn = sum(
            [g.shape[0] for g in gt.values()]
        )  # false negatives # ALPHA: == number of frames (each frame has exactly 1 action instance)
        fp = 0  # false positives
        tp = 0  # true positives
        '''
        # Below may not be needed now if detection is conducted on all frames
        # ADDED: remove potential fn (when not evaluating all frames)?
        # Confirmed: can still be used when evaluating the whole set (at least for JHMDB)
        if opt.dataset == 'hmdb':
            num_tp = 0
            prev_k = (-1, -1)
            for ii, jj in enumerate(detections): 
                k = (int(detections[ii, 0]), int(detections[ii, 1])) # (video id, frame id)
                if k in gt and k != prev_k:
                    num_tp += 1
                prev_k = k
            fn = num_tp
        '''
        for i, j in enumerate(
                np.argsort(-detections[:, 3]
                           )):  # j: index of the det (highest to lowest score)
            k = (int(detections[j, 0]), int(detections[j, 1])
                 )  # (video id, frame id)
            box = detections[j, 4:8]
            ispositive = False

            if k in gt:
                ious = iou2d(gt[k], box)
                amax = np.argmax(ious)

                if ious[amax] >= th:
                    ispositive = True
                    gt[k] = np.delete(gt[k], amax, 0)

                    if gt[k].size == 0:
                        del gt[k]

            # untrimmed evaluation (for ucf24)
            # basically, when a frame is not in the non-modified gt list but its video id appears ...

            if opt.evaluation_mode == 'trimmed':
                if k[0] in gt_vid and not (k in gt_past):
                    continue

            if ispositive:
                tp += 1
                fn -= 1
            else:
                fp += 1

            # ADDED: to avoid division by zero error; is it needed?
            if tp + fp == 0 or tp + fn == 0:
                continue

            pr[i + 1, 0] = float(tp) / float(tp + fp)
            pr[i + 1, 1] = float(tp) / float(tp + fn)

        pr_trimmed = pr[pr[:, 0] != -1]
        results[label] = pr_trimmed

    # display results
    ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels])

    # ADDED: display individual class performance
    frameap_percls = {}
    for cl, cls_name in enumerate(dataset.labels):
        frameap_percls[cls_name] = ap[cl]

    for key, value in frameap_percls.items():
        print(key, ':', value)

    frameap_result = np.mean(ap)
    if print_info:
        log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+')
        log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th))
        print('Task_{} frameAP_{}\n'.format(model_name, th))
        log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result))
        log_file.close()
        print("{:20s} {:8.2f}".format("mAP", frameap_result))

    return frameap_result
Exemplo n.º 8
0
def BuildTubes(opt):
    redo = opt.redo
    if not redo:
        print('load previous linking results...')
        print('if you want to reproduce it, please add --redo')
    Dataset = get_dataset(opt.dataset)
    inference_dirname = opt.inference_dir
    K = opt.K
    split = 'val'
    dataset = Dataset(opt, split)

    print('inference_dirname is ', inference_dirname)
    vlist = dataset._test_videos[opt.split - 1]
    bar = Bar('{}'.format('BuildTubes'), max=len(vlist))
    
    # DEBUG: target cerain video / class to build tube
    #'shoot_ball/ImprovingBasketballSkills-BasketballTurnaroundFadeAway_shoot_ball_f_nm_np1_ri_med_0', 'shoot_ball/KELVIN_shoot_ball_u_cm_np1_ba_med_0', 'shoot_ball/KELVIN_shoot_ball_u_cm_np1_ba_med_2', 
    #target_video = ['jump/Gregoire_Airman_showreel_2008_jump_f_cm_np1_le_bad_2']
    
    for iv, v in enumerate(vlist):
       
        #if v not in target_video:
        #    continue
        
        outfile = os.path.join(inference_dirname, v + "_tubes.pkl")
        
        
        if os.path.isfile(outfile) and not redo:
            continue

        RES = {}
        nframes = dataset._nframes[v]
  
        
        # TODO: hardcoded for jhmdb for now 
        if dataset._nframes[v] >= K  * opt.ninput:
            ok_frame_inds = [16, 21, 26, 31, 36, dataset._nframes[v]]  
        
        else:
            print('video: {}; Number of frames: {}'.format(v, dataset._nframes[v]))
            ok_frame_inds = [dataset._nframes[v] - opt.ninput + 1, dataset._nframes[v]]
            #ok_frame_inds = []
            #for kk in range(opt.K):
            #    ok_frame_inds.append(max(dataset._nframes[v] - kk*opt.ninput, 1))
            #ok_frame_inds.reverse()
        
        if opt.inference_mode == 'stream':
            # record the latest four frame index (for the final frame allocate detection)
            last_k_ind = []
            last_k_ind_init = min(K  * opt.ninput, dataset._nframes[v]) - opt.ninput + 1
            last_k_ind.append(last_k_ind_init)
            for _ in range(opt.K - 1):
                last_k_ind_init = max(1, last_k_ind_init - opt.ninput)
                last_k_ind.append(last_k_ind_init)
            last_k_ind.reverse()
        
        # load detected tubelets
        VDets = {}
        # orig: for startframe in range(1, nframes + 2 - K):
        for startframe in range(min(K  * opt.ninput , dataset._nframes[v]) - opt.ninput + 1  , 1 + dataset._nframes[v]):
            
            if opt.inference_mode == 'stream': # otherwise ignore
                if startframe not in ok_frame_inds:
                    continue
            
            if startframe != min(K  * opt.ninput, dataset._nframes[v]) - opt.ninput + 1: # not initial frame (ex: 16)
                
                if opt.inference_mode == 'stream': # otherwise ignore
                    last_k_ind.append(startframe)
                    if len(last_k_ind) > opt.K: # only keep the last K index
                        del last_k_ind[0]
            
            resname = os.path.join(inference_dirname, v, "{:0>5}.pkl".format(startframe))
            if not os.path.isfile(resname):
                print("ERROR: Missing extracted tubelets " + resname)
                sys.exit()

            with open(resname, 'rb') as fid:
                VDets[startframe] = pickle.load(fid)
        
        # added: may not be correct but proceed with learning tube building
        first_endframe = list(VDets.keys())[0]
               
        for ilabel in range(len(dataset.labels)):
            
            FINISHED_TUBES = []
            CURRENT_TUBES = []  # tubes is a list of tuple (frame, lstubelets)
            # calculate average scores of tubelets in tubes

            def tubescore(tt):
                return np.mean(np.array([tt[i][1][-1] for i in range(len(tt))])) # a tube could contain multiple linked mini-tubes (linked over time); hence the for loop

            #orig: for frame in range(1, dataset._nframes[v] + 2 - K):
            
            
            
            for frame in range(min(K  * opt.ninput , dataset._nframes[v]) - opt.ninput + 1  , 1 + dataset._nframes[v]):
                # load boxes of the new frame and do nms while keeping Nkeep highest scored
                
                
                if opt.inference_mode == 'stream':
                    if frame not in ok_frame_inds:
                        continue
                
                
                ltubelets = VDets[frame][ilabel + 1]  # [:,range(4*K) + [4*K + 1 + ilabel]]  Nx(4K+1) with (x1 y1 x2 y2)*K ilabel-score

                ltubelets = nms_tubelets(ltubelets, 0.6, top_k=10)

                # just start new tubes
                if frame == first_endframe: # orig: 1 
                    for i in range(ltubelets.shape[0]):
                        CURRENT_TUBES.append([(first_endframe, ltubelets[i, :])]) # orig: 1
                    continue

                # sort current tubes according to average score
                avgscore = [tubescore(t) for t in CURRENT_TUBES]
                argsort = np.argsort(-np.array(avgscore))
                CURRENT_TUBES = [CURRENT_TUBES[i] for i in argsort]
                # loop over tubes
                finished = []
                for it, t in enumerate(CURRENT_TUBES):
                    # compute ious between the last box of t and ltubelets # mine interpretation: for each tube in the memory, associates with any possible current tubelet 
                    last_endframe, last_tubelet = t[-1] # confusing -1? -> # a tube could contain multiple linked mini-tubes (linked over time)
                    ious = []
                    offset =  round((frame - last_endframe) / opt.ninput) # orig: frame - last_endframe
                    if offset < K:
                        nov = K - offset # number of overlaps
                        ious = sum([iou2d(ltubelets[:, 4 * iov:4 * iov + 4], last_tubelet[4 * (iov + offset):4 * (iov + offset + 1)]) for iov in range(nov)]) / float(nov)
                    else:
                        ious = iou2d(ltubelets[:, :4], last_tubelet[4 * K - 4:4 * K])# head and tail matching

                    valid = np.where(ious >= 0.5)[0]  # 0.5

                    if valid.size > 0: # ONLY match the best QUERY tube to the database, then delete this query
                        # take the one with maximum score
                        idx = valid[np.argmax(ltubelets[valid, -1])]
                        CURRENT_TUBES[it].append((frame, ltubelets[idx, :]))
                        ltubelets = np.delete(ltubelets, idx, axis=0)
                    else:
                        if offset >= opt.K:
                            finished.append(it)

                # finished tubes that are done
                for it in finished[::-1]:  # process in reverse order to delete them with the right index why --++--
                    FINISHED_TUBES.append(CURRENT_TUBES[it][:])
                    del CURRENT_TUBES[it]

                # start new tubes
                for i in range(ltubelets.shape[0]):
                    CURRENT_TUBES.append([(frame, ltubelets[i, :])])

            # all tubes are not finished
            FINISHED_TUBES += CURRENT_TUBES

            # build real tubes
            output = []
            
            for t_i, t in enumerate(FINISHED_TUBES):
                # DEBUG
                #print(t_i) 
                score = tubescore(t)

                # just start new tubes
                if score < 0.005:
                    continue

                beginframe = max(t[0][0] - opt.ninput*(K-1), 1) #t[0][0] # TODO: needs to be taken care of (forward vs backward)
                endframe = t[-1][0] #t[-1][0] + K - 1 # TODO
                
                length = endframe + 1 - beginframe

                # delete tubes with short duraton (contibuting to many fp?)
                #if length < min(dataset._nframes[v], (K *opt.ninput - opt.ninput + 1) + opt.ninput*3): # 15
                #    continue
                
                if length < dataset._nframes[v] // 2: # 15
                    continue
                # build final tubes by average the tubelets
                out = np.zeros((length, 6), dtype=np.float32)
                out[:, 0] = np.arange(beginframe, endframe + 1)
                n_per_frame = np.zeros((length, 1), dtype=np.int32) # orig: zeros
                
                for i in range(len(t)):
                    frame, box = t[i] # frame: end frame of a tube
                    n_mem = K - 1
                    
                    if opt.inference_mode == 'stream':
                        # for stream detection
                        if frame != nframes:
                            for k in range(K):
                                out[max(frame - k*opt.ninput, 1) - beginframe, 1:5] += box[4 * n_mem:4 * n_mem + 4]
                                out[max(frame - k*opt.ninput, 1) - beginframe, -1] += box[-1] 
                                n_per_frame[max(frame - k*opt.ninput, 1) - beginframe, 0] += 1
                                n_mem -= 1
                            
                            # for the last frame
                        else: 
                            for k in reversed(range(0, K)):
                                out[last_k_ind[k] - beginframe, 1:5] += box[4 * n_mem:4 * n_mem + 4]
                                out[last_k_ind[k] - beginframe, -1] += box[-1] 
                                n_per_frame[last_k_ind[k] - beginframe, 0] += 1
                                n_mem -= 1
                    
                    else:
                        for k in range(K):
                            out[max(frame - k*opt.ninput, 1) - beginframe, 1:5] += box[4 * n_mem:4 * n_mem + 4]
                            out[max(frame - k*opt.ninput, 1) - beginframe, -1] += box[-1] 
                            n_per_frame[max(frame - k*opt.ninput, 1) - beginframe, 0] += 1
                            n_mem -= 1
                        
                ''' orig
                for i in range(len(t)):
                    frame, box = t[i]
                    for k in range(K):
                        out[frame - beginframe + k, 1:5] += box[4 * k:4 * k + 4] # avg effect on the box coord; more stable?
                        out[frame - beginframe + k, -1] += box[-1]  # single frame confidence
                        n_per_frame[frame - beginframe + k, 0] += 1
                '''
                
                nonzero_ind =n_per_frame!=0 # sparse! would be dividing a lot of zeros
                out[nonzero_ind[:,0], 1:] /= n_per_frame[nonzero_ind[:,0]]
                # orig
                #out[:, 1:] /= n_per_frame
                
                if 0 in out[:,-1]: # if any frame index contains 0 (meaning not filled! This line was creating issues!)
                    #print ('Frame detection interpolation takes place!')
                    # ADDED: extrapolation?
                    nonzero_ind = np.where(nonzero_ind==True)[0]
                    nz_v_prev = -5
                    nonzero_nonlink = []
                    for nz_i, nz_v in enumerate(nonzero_ind):
                        nz_offset = nz_v - nz_v_prev
                        
                        
                        if nz_offset == 1: 
                            nz_v_prev = nz_v
                            continue
                        
                        if nz_i > 0:
                            nonzero_nonlink.append((nz_v_prev, nz_v))
                        nz_v_prev = nz_v
                    
                    for idx, lo_hi in enumerate(nonzero_nonlink):
                        lo_hi_dist = lo_hi[1] - lo_hi[0]
                        lo_box = out[lo_hi[0], 1:]
                        hi_box = out[lo_hi[1], 1:]
                        score = (out[lo_hi[0], -1] + out[lo_hi[1], -1]) / 2.
                        diff_box = (hi_box - lo_box) / lo_hi_dist
                        
                        for offset in range(1, lo_hi_dist):
                            if out[lo_hi[0] + offset, -1] == 0: # if the cell was not filled before
                                out[lo_hi[0] + offset, 1:] = lo_box + offset*diff_box
                                out[lo_hi[0] + offset, -1] = score
                            
                
                output.append([out, score])
                
                # out: [num_frames, (frame idx, x1, y1, x2, y2, score)]

            RES[ilabel] = output
        # RES{ilabel:[(out[length,6],score)]}ilabel[0,...]
        with open(outfile, 'wb') as fid:
            pickle.dump(RES, fid)
        Bar.suffix = '[{0}/{1}]:{2}|Tot: {total:} |ETA: {eta:} '.format(
            iv + 1, len(vlist), v, total=bar.elapsed_td, eta=bar.eta_td)
        bar.next()
    bar.finish()
Exemplo n.º 9
0
def BuildTubes(opt):
    redo = opt.redo
    if not redo:
        print('load previous linking results...')
        print('if you want to reproduce it, please add --redo')
    Dataset = get_dataset(opt.dataset)
    inference_dirname = opt.inference_dir
    K = opt.K
    split = 'val'
    dataset = Dataset(opt, split)

    print('inference_dirname is ', inference_dirname)
    vlist = dataset._test_videos[opt.split - 1]
    bar = Bar('{}'.format('BuildTubes'), max=len(vlist))
    for iv, v in enumerate(vlist):
        outfile = os.path.join(inference_dirname, v + "_tubes.pkl")
        if os.path.isfile(outfile) and not redo:
            continue

        RES = {}
        nframes = dataset._nframes[v]

        # load detected tubelets
        VDets = {}
        for startframe in range(1, nframes + 2 - K):
            resname = os.path.join(inference_dirname, v,
                                   "{:0>5}.pkl".format(startframe))
            if not os.path.isfile(resname):
                print("ERROR: Missing extracted tubelets " + resname)
                sys.exit()

            with open(resname, 'rb') as fid:
                VDets[startframe] = pickle.load(fid)
        for ilabel in range(len(dataset.labels)):
            FINISHED_TUBES = []
            CURRENT_TUBES = []  # tubes is a list of tuple (frame, lstubelets)

            # calculate average scores of tubelets in tubes

            def tubescore(tt):
                return np.mean(np.array([tt[i][1][-1]
                                         for i in range(len(tt))]))

            for frame in range(1, dataset._nframes[v] + 2 - K):
                # load boxes of the new frame and do nms while keeping Nkeep highest scored
                ltubelets = VDets[frame][
                    ilabel +
                    1]  # [:,range(4*K) + [4*K + 1 + ilabel]]  Nx(4K+1) with (x1 y1 x2 y2)*K ilabel-score

                ltubelets = nms_tubelets(ltubelets, 0.6, top_k=10)

                # just start new tubes
                if frame == 1:
                    for i in range(ltubelets.shape[0]):
                        CURRENT_TUBES.append([(1, ltubelets[i, :])])
                    continue

                # sort current tubes according to average score
                avgscore = [tubescore(t) for t in CURRENT_TUBES]
                argsort = np.argsort(-np.array(avgscore))
                CURRENT_TUBES = [CURRENT_TUBES[i] for i in argsort]
                # loop over tubes
                finished = []
                for it, t in enumerate(CURRENT_TUBES):
                    # compute ious between the last box of t and ltubelets
                    last_frame, last_tubelet = t[-1]
                    ious = []
                    offset = frame - last_frame
                    if offset < K:
                        nov = K - offset
                        ious = sum([
                            iou2d(
                                ltubelets[:, 4 * iov:4 * iov + 4],
                                last_tubelet[4 * (iov + offset):4 *
                                             (iov + offset + 1)])
                            for iov in range(nov)
                        ]) / float(nov)
                    else:
                        ious = iou2d(ltubelets[:, :4],
                                     last_tubelet[4 * K - 4:4 * K])

                    valid = np.where(ious >= 0.5)[0]

                    if valid.size > 0:
                        # take the one with maximum score
                        idx = valid[np.argmax(ltubelets[valid, -1])]
                        CURRENT_TUBES[it].append((frame, ltubelets[idx, :]))
                        ltubelets = np.delete(ltubelets, idx, axis=0)
                    else:
                        if offset >= opt.K:
                            finished.append(it)

                # finished tubes that are done
                for it in finished[::
                                   -1]:  # process in reverse order to delete them with the right index why --++--
                    FINISHED_TUBES.append(CURRENT_TUBES[it][:])
                    del CURRENT_TUBES[it]

                # start new tubes
                for i in range(ltubelets.shape[0]):
                    CURRENT_TUBES.append([(frame, ltubelets[i, :])])

            # all tubes are not finished
            FINISHED_TUBES += CURRENT_TUBES

            # build real tubes
            output = []
            for t in FINISHED_TUBES:
                score = tubescore(t)

                # just start new tubes
                if score < 0.005:
                    continue

                beginframe = t[0][0]
                endframe = t[-1][0] + K - 1
                length = endframe + 1 - beginframe

                # delete tubes with short duraton
                if length < 15:
                    continue

                # build final tubes by average the tubelets
                out = np.zeros((length, 6), dtype=np.float32)
                out[:, 0] = np.arange(beginframe, endframe + 1)
                n_per_frame = np.zeros((length, 1), dtype=np.int32)
                for i in range(len(t)):
                    frame, box = t[i]
                    for k in range(K):
                        out[frame - beginframe + k,
                            1:5] += box[4 * k:4 * k + 4]
                        out[frame - beginframe + k,
                            -1] += box[-1]  # single frame confidence
                        n_per_frame[frame - beginframe + k, 0] += 1
                out[:, 1:] /= n_per_frame
                output.append([out, score])
                # out: [num_frames, (frame idx, x1, y1, x2, y2, score)]

            RES[ilabel] = output
        # RES{ilabel:[(out[length,6],score)]}ilabel[0,...]
        with open(outfile, 'wb') as fid:
            pickle.dump(RES, fid)
        Bar.suffix = '[{0}/{1}]:{2}|Tot: {total:} |ETA: {eta:} '.format(
            iv + 1, len(vlist), v, total=bar.elapsed_td, eta=bar.eta_td)
        bar.next()
    bar.finish()