Exemplo n.º 1
0
def main():
    rank, world_size = dist_init()
    # rank = 0
    logger.info("init done")

    # load cfg
    cfg.merge_from_file(args.cfg)
    if rank == 0:
        if not os.path.exists(cfg.TRAIN.LOG_DIR):
            os.makedirs(cfg.TRAIN.LOG_DIR)
        init_log('global', logging.INFO)
        if cfg.TRAIN.LOG_DIR:
            add_file_handler('global',
                             os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
                             logging.INFO)

        logger.info("Version Information: \n{}\n".format(commit()))
        logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    # create model
    model = ModelBuilder().train()
    dist_model = nn.DataParallel(model).cuda()

    # load pretrained backbone weights
    if cfg.BACKBONE.PRETRAINED:
        cur_path = os.path.dirname(os.path.realpath(__file__))
        backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED)
        load_pretrain(model.backbone, backbone_path)

    # create tensorboard writer
    if rank == 0 and cfg.TRAIN.LOG_DIR:
        tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR)
    else:
        tb_writer = None

    # build dataset loader
    train_loader = build_data_loader()

    # build optimizer and lr_scheduler
    optimizer, lr_scheduler = build_opt_lr(dist_model.module,
                                           cfg.TRAIN.START_EPOCH)

    # resume training
    if cfg.TRAIN.RESUME:
        logger.info("resume from {}".format(cfg.TRAIN.RESUME))
        assert os.path.isfile(cfg.TRAIN.RESUME), \
            '{} is not a valid file.'.format(cfg.TRAIN.RESUME)
        model, optimizer, cfg.TRAIN.START_EPOCH = \
            restore_from(model, optimizer, cfg.TRAIN.RESUME)
    # load pretrain
    elif cfg.TRAIN.PRETRAINED:
        load_pretrain(model, cfg.TRAIN.PRETRAINED)

    dist_model = nn.DataParallel(model)

    logger.info(lr_scheduler)
    logger.info("model prepare done")

    # start training
    train(train_loader, dist_model, optimizer, lr_scheduler, tb_writer)
Exemplo n.º 2
0
def save_siamese_rpn():
    # load config

    rpn_path = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/pre_train/checkpoint_e45.pth'
    gru_rpn = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/config.yaml'
    cfg.merge_from_file(gru_rpn)
    # create model
    model_rpn = ModelBuilder()
    model_rpn = load_pretrain(model_rpn, rpn_path).cuda().eval()

    gru_path = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/gru_snapshot/gru_10.pth'
    gru_cfg = root_dir + 'experiments/siamrpn_alex_dwxcorr_16gpu/config_gru.yaml'
    cfg.merge_from_file(gru_cfg)
    # create model
    model_gru = ModelBuilder()
    model_gru = load_pretrain(model_gru, gru_path).cuda().eval()

    for key, item in model_gru.named_parameters():

        # print(key.find("grus"))
        print(key, item.shape)

    for key, item in model_rpn.named_parameters():
        # print(key.find("grus"))
        print(key, item.shape)

    model_gru_dict = model_gru.state_dict()
    model_rpn_dict = model_rpn.state_dict()

    for key in model_gru_dict:

        if key.find("grus") != -1:
            print("fix:", key)

        else:
            print("change:", key)
            model_gru_dict[key] = model_rpn_dict[key]

    # name_map={}
    # model_legacy_dict = model_legacy.state_dict()
    # model_alexnet_dict = model_alexnet.state_dict()
    # for para1,para2 in zip(model_legacy.named_parameters(),model_alexnet.named_parameters()):
    #     # print(para1[0],para1[1].shape)
    #     print(para1[0])
    #     print(para2[0])
    #     print(para1[1].shape)
    #     print(para2[1].shape)
    #     print("--"*40)
    #     # print("['{}'--->'{}']".format(para1[0], para2[0]),para1[1].shape, para2[1].shape)
    #     name_map[para1[0]]=para2[0]
    # print(name_map)
    #
    #
    # for key,val in name_map.items():
    #     model_alexnet_dict[val]=model_legacy_dict[key]

    torch.save(model_gru_dict, "siamese_gru10_rpn45.pth")
Exemplo n.º 3
0
def main():
    rank, world_size = dist_init()
    logger.info("init done")

    # load cfg
    cfg.merge_from_file(args.cfg)
    if rank == 0:
        if not os.path.exists(cfg.TRAIN.LOG_DIR):
            os.makedirs(cfg.TRAIN.LOG_DIR)
        init_log('global', logging.INFO)
        if cfg.TRAIN.LOG_DIR:
            add_file_handler('global',
                             os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
                             logging.INFO)

        logger.info("Version Information: \n{}\n".format(commit()))
        logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    # create model
    model = Template_Enhance().cuda().train()
    dist_model = DistModule(model)

    # load pretrained SiamRPN++ model
    if cfg.TSA.MODELBUILD_PATH:
        cur_path = os.path.dirname(os.path.realpath(__file__))
        backbone_path = os.path.join(cur_path, '../', cfg.TSA.MODELBUILD_PATH)
        load_pretrain(model.model, backbone_path)

    # create tensorboard writer
    if rank == 0 and cfg.TRAIN.LOG_DIR:
        tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR)
    else:
        tb_writer = None

    # build datasets loader
    train_loader, val_loader = build_data_loader()

    # build optimizer and lr_scheduler
    optimizer, scheduler = build_opt_lr(dist_model.module)

    logger.info(scheduler)
    logger.info("model prepare done")

    # start estimation
    # train(train_loader, dist_model, optimizer, scheduler, tb_writer)
    estimation = Estimator(train_loader, val_loader, dist_model, optimizer, scheduler, tb_writer)
    # estimation.evaluate(5000, 'loss')
    estimation.train()
Exemplo n.º 4
0
def setup_tracker():
    cfg.merge_from_file(cfg_file)

    model = ModelBuilder()
    model = load_pretrain(model, model_file).cuda().eval()

    tracker = build_tracker(model)
    warmup(model)
    return tracker
Exemplo n.º 5
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    # model.load_state_dict(torch.load(args.snapshot,
    #     map_location=lambda storage, loc: storage.cpu()))
    # model.eval().to(device)
    model = load_pretrain(model, args.snapshot).eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
Exemplo n.º 6
0
    def __init__(self):
        super(DROL, self).__init__("DROL")

        # load config
        cfg.merge_from_file(path_config.DROL_CONFIG)
        seed_torch(cfg.TRACK.SEED)

        # create model
        model = ModelBuilder()

        # load model
        model = load_pretrain(model, path_config.DROL_SNAPSHOT).cuda().eval()

        # build tracker
        self.tracker = build_tracker(model)
Exemplo n.º 7
0
    def __init__(self, lr_u=0.2,lr_v=0.2,lambda_u=0.1,lambda_v=10.0,x_padding=0.5, z_ratio=1.2,features='gray', kernel='gaussian'):
        super(SFKCF).__init__()
        self.x_padding = x_padding
        self.lambda_ = 1e-4
        self.features = features
        self.w2c=None
        if self.features=='hog':
            self.interp_factor = 0.02
            self.sigma = 0.5
            self.cell_size=4
            self.output_sigma_factor=0.1

        elif self.features=='sfres50':

            self.interp_factor = 0.02
            self.sigma = 0.5
            self.cell_size=8.0
            self.output_sigma_factor=0.1
            model = ModelBuilder()
            model = load_pretrain(model, cfg.BACKBONE.PRETRAINED).backbone
            self.model = model.cuda().eval()

        elif self.features=='gray' or self.features=='color':

            self.interp_factor=0.075
            self.sigma=0.2
            self.cell_size=1
            self.output_sigma_factor=0.1

        elif self.features=='cn':
            self.interp_factor=0.075
            self.sigma=0.2
            self.cell_size=1
            self.output_sigma_factor=1./16
            self.padding=1

        else:
            raise NotImplementedError

        self.kernel=kernel
        self.U = None
        self.V = None
        self.lr_u = lr_u
        self.lr_v = lr_v
        self.lambda_v = lambda_v
        self.lambda_u = lambda_u
        self.z_padding = z_ratio*x_padding
        self.vis = None
Exemplo n.º 8
0
def load_pysot_model(tracker_type):
    configpath = "./week3/kalman/pysot/experiments/" + PYSOT_TRACKERS[tracker_type] + \
                 "/config.yaml"
    modelpath = "./week3/kalman/pysot/models/" + PYSOT_TRACKERS[
        tracker_type] + ".pth"

    cfg.merge_from_file(configpath)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # load model
    model = ModelBuilder()
    model.load_state_dict(
        torch.load(modelpath, map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)
    return load_pretrain(model, modelpath).cuda().eval()
Exemplo n.º 9
0
 def __init__(self,dataset=''):
     if 'OTB' in dataset:
         cfg_file = os.path.join(project_path_,'pysot/experiments/siamrpn_r50_l234_dwxcorr_otb/config.yaml')
         snapshot = os.path.join(project_path_,'pysot/experiments/siamrpn_r50_l234_dwxcorr_otb/model.pth')
     elif 'LT' in dataset:
         cfg_file = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr_lt/config.yaml')
         snapshot = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr_lt/model.pth')
     else:
         cfg_file = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr/config.yaml')
         snapshot = os.path.join(project_path_, 'pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth')
     # load config
     cfg.merge_from_file(cfg_file)
     # create model
     self.model = ModelBuilder()# A Neural Network.(a torch.nn.Module)
     # load model
     self.model = load_pretrain(self.model, snapshot).cuda().eval()
Exemplo n.º 10
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).eval().to(device)

    # build tracker
    tracker = SiamAPNTracker(model, cfg.TRACK)

    hp = {'lr': 0.3, 'penalty_k': 0.04, 'window_lr': 0.4}

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame, hp)
            bbox = list(map(int, outputs['bbox']))
            cv2.rectangle(frame, (bbox[0], bbox[1]),
                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0),
                          3)
            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
Exemplo n.º 11
0
def save_siamese_rpn():
    # load config

    cfg.merge_from_file(args.config)
    cfg.BACKBONE.TYPE = 'alexnetlegacy'
    # create model
    model_legacy = ModelBuilder()
    # load model
    model_legacy = load_pretrain(model_legacy, args.snapshot).cuda().eval()

    cfg.BACKBONE.TYPE = 'alexnet'
    # create model
    model_alexnet = ModelBuilder()
    #
    # for key ,item in model.named_parameters():
    #     print(key,item.shape)

    for key, item in model_alexnet.named_parameters():
        print(key, item.shape)
    name_map = {}
    model_legacy_dict = model_legacy.state_dict()
    model_alexnet_dict = model_alexnet.state_dict()
    for para1, para2 in zip(model_legacy.named_parameters(),
                            model_alexnet.named_parameters()):
        # print(para1[0],para1[1].shape)
        print(para1[0])
        print(para2[0])
        print(para1[1].shape)
        print(para2[1].shape)
        print("--" * 40)
        # print("['{}'--->'{}']".format(para1[0], para2[0]),para1[1].shape, para2[1].shape)
        name_map[para1[0]] = para2[0]
    print(name_map)

    for key, val in name_map.items():
        model_alexnet_dict[val] = model_legacy_dict[key]

    torch.save(model_alexnet_dict, "siamese_alexnet_rpn.pth")
Exemplo n.º 12
0
def train(video, v_idx, attack_region, template_dir):
    n_epochs = args.epochs
    epsilon = args.epsilon
    lr = args.lr

    track_model = ModelBuilder()
    track_model = load_pretrain(track_model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(track_model)
    # h, w, _ = video[0][0].shape
    attacker = ModelAttacker(args.batch, args.epsilon).cuda().train()
    optimizer = optim.Adam(attacker.parameters(), lr=lr)
    # optimizer = optim.SGD(attacker.parameters(), lr=lr, momentum=0.9)

    for name, param in tracker.model.named_parameters():
        param.requires_grad_(False)

    # load pretrained
    start_epoch = 0
    checkpoint_dir = os.path.join(args.savedir, args.dataset, 'checkpoint',
                                  attack_region, video.name)
    if os.path.exists(checkpoint_dir):
        entries = os.listdir(checkpoint_dir)
        if len(entries) > 0:
            entries.sort()
            start_epoch = int(
                entries[-1][-7:-4]) if entries[-1][-7:-4].isnumeric() else 0

            if start_epoch == args.epochs:
                return

    if start_epoch > 0:
        state = torch.load(os.path.join(checkpoint_dir, entries[-1]))
        attacker.load_state_dict(state['attacker'])
        optimizer.load_state_dict(state['optimizer'])

    # elif attack_region == 'search':
    #     checkpoint_dir = os.path.join(args.savedir, args.dataset, 'checkpoint', 'template', video.name)
    #     assert os.path.exists(
    #         os.path.join(checkpoint_dir, 'checkpoint_100.pth')), ' missing ' + checkpoint_dir + ' in ' + video.name
    #     state = torch.load(os.path.join(checkpoint_dir, 'checkpoint_100.pth'))
    #     attacker.load_state_dict(state['attacker'])
    #     optimizer.load_state_dict(state['optimizer'])

    # generate cropping offset
    if attack_region == 'template':
        tracker.generate_transition(0, len(video))
    elif attack_region == 'search':
        tracker.generate_transition(0, len(video))

    # disable gradient
    if attack_region == 'template':
        attacker.adv_x.requires_grad = False
    elif attack_region == 'search':
        attacker.adv_z.requires_grad = False

    training_data = MyDataset()
    num_frames = len(video)

    it = math.ceil((num_frames - 1) / args.batch)
    params = {'batch_size': args.batch, 'shuffle': False, 'num_workers': 6}

    for i in range(0, it - 1):
        for j in range(0, args.batch):
            indx = i * args.batch + j + 1
            training_data.add([video[indx][0], video[indx][1]])

    for j in range(args.batch * (it - 1), num_frames - 1):
        training_data.add([video[j + 1][0], video[j + 1][1]])

    img_names = [
        x.replace(args.dataset_dir, args.fabricated_dir)
        for x in video.img_names
    ]
    del img_names[0]

    data_loader = torch.utils.data.DataLoader(training_data, **params)

    toc = 0

    for epoch in range(0, args.epochs):

        if epoch < start_epoch:
            continue

        # initial frame
        img, gt_bbox = video[0]
        if attack_region == 'search':
            # img_name = video.img_names[0].replace(args.dataset_dir, args.fabricated_dir)
            img_name = os.path.join(args.savedir, args.dataset, video.name,
                                    '000099.jpg')
            img = cv2.imread(img_name)
        gt_bbox, gt_bbox_ = gt_bbox_adaptor(gt_bbox)

        state = {'img': img, 'gt_bbox': gt_bbox_, 'video_name': video.name}

        state, loss = adversarial_train(0, state, attacker, tracker, optimizer,
                                        gt_bbox_, attack_region, template_dir,
                                        epoch)
        pbar = tqdm(enumerate(data_loader), position=0, leave=True)
        _loss = []
        if attack_region == 'template':
            adv_z = []

        for (_idx, (idx, (imgs, gt_bboxes))) in enumerate(pbar):
            if len(gt_bboxes[0]) == 4:
                gt_bboxes = (gt_bboxes[:, 0], gt_bboxes[:, 1], gt_bboxes[:, 0],
                             gt_bboxes[:, 1] + gt_bboxes[:, 3] - 1,
                             gt_bboxes[:, 0] + gt_bboxes[:, 2] - 1,
                             gt_bboxes[:, 1] + gt_bboxes[:, 3] - 1,
                             gt_bboxes[:, 0] + gt_bboxes[:, 2] - 1,
                             gt_bboxes[:, 1])

            gt_bboxes = torch.stack(gt_bboxes).float()
            cx, cy, w, h = get_axis_aligned_bbox_tensor(gt_bboxes)
            gt_bboxes_ = torch.stack([cx, cy, w, h])

            tic = cv2.getTickCount()

            state['img'] = imgs

            state, loss = adversarial_train(args.batch * idx + 1, state,
                                            attacker, tracker, optimizer,
                                            gt_bboxes_, attack_region,
                                            template_dir, epoch)

            toc += cv2.getTickCount() - tic

            if idx < 60:
                if idx > 0:
                    _loss.append(loss)
                    # pbar.set_postfix_str('%d. Video: %s epoch: %d total %.3f %.3f %.3f %.3f %.3f' %
                    #                      (v_idx + 1, video.name, epoch + 1, loss[0], loss[1], loss[2], loss[3],
                    #                       attacker.adv_z.mean()))
                    pbar.set_postfix_str('Video(%d): %s epoch: %d ' %
                                         (v_idx + 1, video.name, epoch + 1))
                    # pbar.set_postfix_str('%d. Video: %s epoch: %d total %.3f %.3f %.3f %.3f' %
                    #                      (v_idx + 1, video.name, epoch + 1, loss[0], loss[1], loss[2], loss[3]))

                if attack_region == 'search':

                    fabricated_dir = '/'.join(img_names[0].split('/')[:-1])
                    if not os.path.exists(fabricated_dir):
                        os.makedirs(os.path.join(fabricated_dir))

                    for i in range(len(imgs)):
                        x_adv = attacker.add_noise(tracker.x_crops[i],
                                                   attacker.adv_x[i])
                        x_adv = x_adv.unsqueeze(0)
                        save(imgs[i].data.cpu().numpy(),
                             x_adv,
                             state['s_x'],
                             gt_bboxes_[:, i],
                             img_names[args.batch * idx + i],
                             shift=tracker.shift[:, args.batch * idx + i +
                                                 1].numpy(),
                             region=attack_region,
                             save=True)

        toc /= cv2.getTickFrequency()

        if attack_region == 'template':
            z_adv = attacker.add_noise(tracker.z_crop, attacker.adv_z, epsilon)
            img_dir = os.path.join(args.savedir, args.dataset,
                                   state['video_name'])
            if not os.path.exists(img_dir):
                os.makedirs(os.path.join(img_dir))
            save(state['zimg'],
                 z_adv,
                 state['sz'],
                 state['init_gt'],
                 attack_region,
                 os.path.join(img_dir,
                              str(epoch).zfill(6) + '.jpg'),
                 shift=None,
                 region=attack_region,
                 save=True)

        _loss = np.asarray(_loss)

        _loss_v = sum(_loss, 0) / _loss.shape[0]
        pbar.clear()
        print('%d. Video: %s Time: %.2fs  epoch: %d total %.3f %.3f %.3f' %
              (v_idx + 1, video.name, toc, epoch + 1, _loss_v[0], _loss_v[1],
               _loss_v[2]))

        # save state dict
        state_dict = {
            'attacker': attacker.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch + 1
        }

        checkpoint_path = os.path.join(args.savedir, args.dataset,
                                       'checkpoint', attack_region, video.name)
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)
        torch.save(state_dict, os.path.join(checkpoint_path, 'checkpoint.pth'))
Exemplo n.º 13
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)
    
    # create model
    model = Model2021()
    
    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()
    
    # build tracke
    tracker = build_tracker(model)
    
    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)
    
    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0 
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            overlaps1 = []
            vars1 = []
            vars0 = []
            occl1 = []
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
    
            frame_width = 960#img.shape[1]
            frame_height = 540#img.shape[0]
            video_loc = os.path.join('../results', model_name, video.name)
    
            out = cv2.VideoWriter(video_loc+'.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height),True)
            if video.tags['occlusion']==[] or (np.array(video.tags['occlusion'])==1).sum()==0:
                print("\t\tdiscard occlusion")
                continue
                video.tags['occlusion'] = video.tags['all']
    
            for idx, (img, gt_bbox) in enumerate(video):
                   
                if len(gt_bbox) == 4:
                    gt_bbox = [gt_bbox[0], gt_bbox[1],
                       gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]]
                tic = cv2.getTickCount()

                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    box1 = gt_bbox_
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                    if idx == 0:
                        print(img.shape)
                elif idx > frame_counter:
                    outputs = tracker.track(img, mode)
                    pred_bbox = outputs['bbox']
                    
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0]))
    #######################################################################################
                    cx, cy, w, h  = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    box2 = gt_bbox_                
                    w1, h1 = box1[2], box1[3]
                    w2, h2 = box2[2], box2[3]
                    cx1, cy1 = (img.shape[1]//2, img.shape[0]//2)
                    cx2, cy2 = (box2[2]/2+box2[0], box2[3]/2+box2[1])
    #                box1 = box2
                    # scale variation
                    s1 = np.sqrt(w1*h1)
                    s2 = np.sqrt(w2*h2)    
                    sv = max(s1/s2, s2/s1)
                    
                    # aspect ratio variation
                    r1, r2 = h1/w1, h2/w2
                    arv = max(r1/r2, r2/r1)
                    
                    # fast motion
                    fm = np.sqrt((cx2-cx1)**2+(cy2-cy1)**2)/np.sqrt(s1*s2)
                    vars0.append(np.array([sv, arv, fm, outputs['cls2']]))
                    # occlusion
    #########################################################################################
     #               print(idx, outputs['var'], np.array([sv, arv, fm]))  ##################################
                    overlaps1.append(overlap)
                    vars1.append(outputs['cls2'])
                    if idx<=len(video.tags['occlusion']):
                        occl1.append(video.tags['occlusion'][idx])
                    else:
                        occl1.append(np.zeros(idx-len(video.tags['occlusion'])))
                    if overlap > 0.0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                     #   print("-------loss---------")
                        pred_bboxes.append(2)
                        frame_counter = idx + 5 # skip 5 frames
                        lost_number += 1
                        for l in range(0,5):
                            vars1.append(-0.2)
                            occl1.append(-0.2)
                else:
                    pred_bboxes.append(0)
                    
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    
                    cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))],
                            True, (255, 0, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(img, [np.array(pred_bbox, np.int).reshape((-1, 1, 2))],
                                True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    print(idx)
                    cv2.putText(img, 'occl_gt:'+str(video.tags['occlusion'][idx-1]), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
                    cv2.putText(img, 'proposed_TL:'+str(lost_number), (40, 160), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    
                    cv2.putText(img, 'occl_pred:'+str(vars1[idx-1]), (40, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    out.write(img)
                    cv2.imwrite(video_loc+str(idx)+'.png',img)
                    cv2.waitKey(1)
    
            toc /= cv2.getTickFrequency()
            # save results
            out.release()
            video_path = os.path.join(args.results, args.dataset, model_name,
                    'baseline', video.name)
    
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}, mIOU: {:0.4f}'.format(
                    v_idx+1, video.name, toc, idx / toc, lost_number, np.array(overlaps1).mean()))
    #        plt.plot(overlaps1)
    #        plt.plot(np.array(vars0)[:,3])
  #          plt.plot(np.array(occl1))

  #          plt.plot(np.array(vars1))
    #        print(np.correlate(overlaps1,np.array(vars1)[:,2]))
            overlaps2.append(np.array(overlaps1).mean())
   #         occl2.append(np.array(occl1))
   #         vars2.append(np.array(vars1))
 #           if args.video != '':
#                v_idx=0
#            print(100*(confusion_matrix(occl2[v_idx],vars2[v_idx]).ravel()))
                
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    
    # cv2.destroyAllWindows()
    # print("Total Mean IOU is   %0.4f"%np.array(overlaps2).mean())

    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]),
                                  (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.rewaitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('../results', args.dataset, model_name,
                        'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                        '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
                result_path = os.path.join(video_path,
                        '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write("{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                        '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('../results', args.dataset, model_name, video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
                result_path = os.path.join(video_path,
                        '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('../results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path, '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
                v_idx+1, video.name, toc, idx / toc ))
Exemplo n.º 14
0
def main():
    rank, world_size = dist_init()
    logger.info("init done")

    # load cfg
    cfg.merge_from_file(args.cfg)
    if rank == 0:
        if not os.path.exists(cfg.TRAIN.LOG_DIR):
            os.makedirs(cfg.TRAIN.LOG_DIR)
        init_log('global', logging.INFO)
        if cfg.TRAIN.LOG_DIR:
            add_file_handler('global',
                             os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
                             logging.INFO)

#        logger.info("Version Information: \n{}\n".format(commit()))
        logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    # create model


#    model = ModelBuilder().cuda().train()
    model2 = SiamNet_D().cuda().train()  ###
    #    tracker = build_tracker(model)
    #    dist_model = DistModule(model)
    dist_model2 = DistModule(model2)
    # load pretrained backbone weights
    if cfg.BACKBONE.PRETRAINED:
        cur_path = os.path.dirname(os.path.realpath(__file__))
        backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED)
        load_pretrain(model2.netG.backbone, backbone_path)

    # create tensorboard writer
    if rank == 0 and cfg.TRAIN.LOG_DIR:
        tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR)
    else:
        tb_writer = None

    # build dataset loader
    train_loader = build_data_loader()

    # build optimizer and lr_scheduler
    #    optimizer, lr_scheduler, paramsG = build_opt_lr(dist_model2, cfg.TRAIN.START_EPOCH)
    optimizer2, lr_scheduler2 = build_opt_lr2(dist_model2,
                                              cfg.TRAIN.START_EPOCH)
    #    print("gen:",paramsG)
    #    print("disc",paramsD)
    # resume training
    #    if cfg.TRAIN.RESUME:
    #        logger.info("resume from {}".format(cfg.TRAIN.RESUME))
    #        assert os.path.isfile(cfg.TRAIN.RESUME), \
    #            '{} is not a valid file.'.format(cfg.TRAIN.RESUME)
    #        model2, optimizer2, cfg.TRAIN.START_EPOCH = \
    #            restore_from(model2.module.netG, optimizer2, cfg.TRAIN.RESUME)
    # load pretrain
    #    if cfg.TRAIN.PRETRAINED:
    #        load_pretrain(model2.netG, cfg.TRAIN.PRETRAINED)
    #    dist_model = DistModule(model)
    dist_model2 = DistModule(model2)
    #    logger.info(lr_scheduler)
    logger.info("model prepare done")

    # start training
    train(train_loader, tb_writer, dist_model2, optimizer2, lr_scheduler2)
Exemplo n.º 15
0
def test(video, v_idx, model_name, template_dir=None, img_names=None):
    # create model
    track_model = ModelBuilder()
    # load model
    track_model = load_pretrain(track_model, args.snapshot).cuda().eval()
    # build tracker
    tracker = build_tracker(track_model)

    # set writing video parameters
    height, width, channels = video[0][0].shape
    out = cv2.VideoWriter(
        os.path.join(args.savedir, args.dataset, video.name + '.avi'),
        cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (width, height))
    frame_counter = 0
    toc = 0
    pred_bboxes_adv = []
    adv_z = []

    pbar = tqdm(enumerate(video), position=0, leave=True)

    for idx, (img, gt_bbox) in pbar:

        gt_bbox, gt_bbox_ = gt_bbox_adaptor(gt_bbox)

        tic = cv2.getTickCount()
        pred_bbox, _lost, frame_counter = stoa_track(idx, frame_counter, img,
                                                     gt_bbox, tracker,
                                                     template_dir, img_names)
        pred_bboxes_adv.append(pred_bbox)
        toc += cv2.getTickCount() - tic

        if idx > 0:
            bbox = list(map(int, pred_bbox))
            cv2.rectangle(img, (bbox[0], bbox[1]),
                          (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                          (0, 255, 255), 3)

        __gt_bbox = list(map(int, gt_bbox_))
        cv2.rectangle(
            img, (__gt_bbox[0], __gt_bbox[1]),
            (__gt_bbox[0] + __gt_bbox[2], __gt_bbox[1] + __gt_bbox[3]),
            (0, 0, 0), 3)

        cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 0), 2)

        out.write(img)

    # save results
    if args.dataset not in ['VOT2016', 'VOT2018', 'VOT2019']:
        model_path = os.path.join('results', args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes_adv:
                f.write(','.join([str(i) for i in x]) + '\n')
    else:
        video_path = os.path.join('results', args.dataset, model_name,
                                  'baseline', video.name)
        if not os.path.isdir(video_path):
            os.makedirs(video_path)
        result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))

        with open(result_path, 'w') as f:
            for x in pred_bboxes_adv:
                if isinstance(x, int):
                    f.write("{:d}\n".format(x))
                else:
                    f.write(','.join([vot_float2str("%.4f", i)
                                      for i in x]) + '\n')
Exemplo n.º 16
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    #!!! input your dataset path
    dataset_root = os.path.join(your_dataset_path, args.dataset)

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    model_name = model_name + '_pk-{:.3f}'.format(
        cfg.TRACK.PENALTY_K) + '_wi-{:.3f}'.format(
            cfg.TRACK.WINDOW_INFLUENCE) + '_lr-{:.3f}'.format(cfg.TRACK.LR)
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue

            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic

            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join('results', args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())

            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))
Exemplo n.º 17
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(args.dataset_dir, args.dataset)

    epsilon = args.epsilon

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().train()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False,
                                            config=cfg)
    #
    # vid.name = {'ants1','ants3',....}
    # img, bbox, cls, delta, delta_weight
    # vid[0][0],vid[0][1],vid[0][2],vid[0][3],vid[0][4]

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0

    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:

        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue

            # set writing video parameters
            height, width, channels = video[0][0].shape
            out = cv2.VideoWriter(
                os.path.join(args.savedir, video.name + '.avi'),
                cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15,
                (width, height))
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            data = {'template': None, 'search': None}
            for idx, (img, gt_bbox, cls, delta_cls, delta_w, _bbox, cls_s, delta_cls_s, delta_w_s, _bbox_s) \
                    in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))

                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)

                    nimg, sz, box, _ = tracker.crop(img,
                                                    bbox=gt_bbox_,
                                                    im_name='exemplar')
                    data['template'] = torch.autograd.Variable(
                        nimg, requires_grad=True).cuda()
                elif idx > frame_counter:

                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    nimg, sz, box, pad = tracker.crop(img,
                                                      bbox=gt_bbox_,
                                                      is_template=False,
                                                      im_name='search' +
                                                      str(idx))
                    [bT, bB, bL, bR] = box
                    sz = int(sz)
                    data['search'] = torch.autograd.Variable(
                        nimg, requires_grad=True).cuda()
                    data['label_cls'] = torch.Tensor(cls_s).type(
                        torch.LongTensor).cuda()
                    data['label_loc'] = torch.Tensor(delta_cls_s).type(
                        torch.FloatTensor).cuda()
                    data['label_loc_weight'] = torch.Tensor(delta_w_s).cuda()

                    outputs = model(data)

                    cls_loss = outputs['cls_loss']
                    loc_loss = outputs['loc_loss']
                    total_loss = outputs['total_loss']
                    total_loss.backward()

                    data_grad = data['search'].grad

                    # torch.Tensor(img.transpose([2, 0, 1])).unsqueeze(dim=0)

                    perturb_data = fgsm_attack(data['search'], epsilon,
                                               data_grad)
                    # cv2.imwrite(os.path.join(args.savedir, 'original_' + str(idx) + '.jpg'), img)

                    # _img = perturb_data.data.cpu().numpy().squeeze().transpose([1, 2, 0])
                    # cv2.imwrite(os.path.join(args.savedir, 'perturb_' + str(idx) + '.jpg'), _img)

                    if not np.array_equal(cfg.TRACK.INSTANCE_SIZE, sz):
                        perturb_data = F.interpolate(perturb_data, size=sz)

                    _img = perturb_data.data.cpu().numpy().squeeze().transpose(
                        [1, 2, 0])
                    # cv2.imwrite(os.path.join(args.savedir, 'crop_full_' + str(idx) + '.jpg'), _img)
                    nh, nw, _ = _img.shape
                    img[bT:bB + 1, bL:bR + 1, :] = _img[pad[0]:nh - pad[1],
                                                        pad[2]:nw - pad[3], :]
                    # cv2.imwrite(os.path.join(args.savedir, 'perturb_full_' + str(idx) + '.jpg'), img)

                    outputs = tracker.track(img)

                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        print('*************** lost ***************')
                        import pdb
                        pdb.set_trace()
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1

                    print(idx, torch.sum(data_grad, (2, 3)))
                    print(
                        idx,
                        torch.sum(torch.abs(torch.sum(data_grad, (2, 3))),
                                  (0, 1)))

                else:
                    pred_bboxes.append(0)

                toc += cv2.getTickCount() - tic

                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)

                # save tracking image
                bbox = list(map(int, pred_bbox))
                cv2.rectangle(img, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.putText(img, str(lost_number), (40, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                # cv2.imwrite(os.path.join(args.savedir, 'track_' + str(idx) + '.jpg'), img)
                out.write(img)

            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join('results', args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))
Exemplo n.º 18
0
def main():
    '''change save_path to yours'''
    save_path = '/home/masterbin-iiau/Desktop/AdvTrack-project/supplementary/%s' % args.video
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    # load config
    cfg.merge_from_file(args.config)

    dataset_root = os.path.join(dataset_root_, args.dataset)
    # create model
    '''a model is a Neural Network.(a torch.nn.Module)'''
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    '''a tracker is a object, which consists of not only a NN but also some post-processing'''
    tracker = build_tracker(model)
    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    # model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    '''GAN'''
                    outputs = tracker.track_supp(img, GAN, save_path, idx)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()

    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track_supp(img, GAN, save_path, idx)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
Exemplo n.º 19
0
from pysot.utils.model_load import load_pretrain
from pysot.models.model_builder import ModelBuilder
from pysot.core.config import cfg
from pysot.utils.bbox import get_axis_aligned_bbox
from toolkit.datasets import DatasetFactory
from matplotlib.image import imread

from PIL import Image
from got10k.datasets import GOT10k
from got10k.utils.viz import show_frame

if __name__ == '__main__':
    cfg.merge_from_file('/home/sourabhswain/Documents/SiamRPN/config.yaml')
    net_path = '/home/sourabhswain/Documents/SiamRPN/model.pth'
    model = ModelBuilder()
    model = load_pretrain(model, net_path).cuda().eval()
    tracker = build_tracker(model)

    dataset = GOT10k(root_dir='/home/sourabhswain/Documents/SiamRPN/dataset',
                     subset='val')

    #dataset = DatasetFactory.create_dataset(name='GOT-10k',
    #                                        dataset_root='/home/sourabhswain/Documents/SiamRPN/dataset',
    #                                        load_img=False)
    """
    for v_idx, (video, anno) in enumerate(dataset):


        toc = 0
        pred_bboxes = []
        scores = []
Exemplo n.º 20
0
def main(frame_interval, interpolation_rate):
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 0, 255), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (255, 0, 0), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    window_name = "Result"
                    cv2.moveWindow(window_name, 100, 100)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # # save results
            # video_path = os.path.join('results', args.dataset, model_name,
            #         'baseline', video.name)
            # if not os.path.isdir(video_path):
            #     os.makedirs(video_path)
            # result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
            # with open(result_path, 'w') as f:
            #     for x in pred_bboxes:
            #         if isinstance(x, int):
            #             f.write("{:d}\n".format(x))
            #         else:
            #             f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n')
            # print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format(
            #         v_idx+1, video.name, toc, idx / toc, lost_number))
            # total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # FPS List
        fps_list = []

        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []

            # PARAMETERS
            # frame_interval = 2
            # interpolation_rate = 0.005

            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w,
                                h]  # (left-top width height)
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']  # (left-top width height)
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])

                    ######################################
                    # Adaptive Template(exemplar update) #
                    ######################################
                    if idx % frame_interval == 0:
                        tracker.update_z(img,
                                         pred_bbox,
                                         interpolation_rate=interpolation_rate)

                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (255, 0, 0),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    window_name = "Result"
                    cv2.moveWindow(window_name, 20, 20)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()

            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            # OTB-100 HERE!!!!!!!!!!!!!!
            else:
                result_folder_name = "results_{0:d}frame_exemplar_update_rate_{1:s}".format(
                    frame_interval, str(interpolation_rate))
                model_path = os.path.join(result_save_base_path,
                                          result_folder_name, args.dataset,
                                          model_name)
                # model_path = os.path.join(result_save_base_path, 'results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')

            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))

            # FPS Result
            fps = idx / toc
            fps_list.append(fps)

        # Make FPS Result Path
        fps_array = np.asarray(fps_list).reshape(-1, 1)
        fps_file_name = "model_fps__[{:3.1f}].txt".format(
            np.average(fps_array))
        model_fps_file = os.path.join(os.path.dirname(model_path), "../",
                                      fps_file_name)
        np.savetxt(model_fps_file, fps_array)
Exemplo n.º 21
0
if __name__ == "__main__":

    torch.set_num_threads(1)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

    # load config
    #cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    #dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)
    dataset_root = os.path.join("/ssd", args.dataset)
    # create model
    net = models.__dict__[args.arch](anchors_nums=args.anchor_nums,
                                     cls_type=args.cls_type)
    net = load_pretrain(net, args.snapshot)
    net.eval()
    net = net.cuda()

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    # Eval dataset
    # root = os.path.realpath(os.path.join(os.path.dirname(__file__),
    #                                      '../testing_dataset'))
    root = "/ssd"
    root = os.path.join(root, args.dataset)
    if 'OTB' in args.dataset:
        dataset_eval = OTBDataset(args.dataset, root)
Exemplo n.º 22
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    # cur_dir = os.path.dirname(os.path.realpath(__file__))
    # dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)
    # UAVDTdataset = '/media/li/CA5CF8AE5CF89683/research/UAVDT/'
    # UAVDTdataset = '/media/li/DATA/VisDrone2019-SOT/'
    UAVDTdataset = args.datasetroot
    dataset_root = os.path.join(UAVDTdataset)
    
    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker_f(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-2].split('.')[0]
    torch.cuda.synchronize()

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        #if v_idx>40:
        o_path=os.path.join('results_rt_raw_f', args.dataset, model_name)
        if not os.path.isdir(o_path):
            os.makedirs(o_path)
        out_path = os.path.join('results_rt_raw_f', args.dataset, model_name, video.name + '.pkl')
        if os.path.isfile(out_path):
            print('({:3d}) Video: {:12s} already done!'.format(
            v_idx+1, video.name))
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        input_fidx = []
        runtime = []
        timestamps = []
        last_fidx = None
        n_frame=len(video)
        t_total = n_frame/args.fps
        t_start = perf_counter()
        while 1:
            t1 = perf_counter()
            t_elapsed=t1-t_start
            if t_elapsed>t_total:
                break
            # identify latest available frame
            fidx_continous = t_elapsed*args.fps
            fidx = int(np.floor(fidx_continous))
            #if the tracker finishes current frame before next frame comes, continue
            if fidx == last_fidx:
                continue
            last_fidx=fidx
            tic = cv2.getTickCount()
            (img,gt_bbox)=video[fidx]
            if fidx == 0:
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                tracker.init(img, gt_bbox_)
                torch.cuda.synchronize()
                t2 = perf_counter()
                t_elapsed=t2-t_start
                timestamps.append(t_elapsed)
                runtime.append(t2-t1)
                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)
                input_fidx.append(fidx)
            else:
                box_f = tracker.forecaster.forecast(fidx, input_fidx[-1], np.array([pred_bboxes[-1]]))
                outputs = tracker.track(img,box_f[0])
                torch.cuda.synchronize()
                t2 = perf_counter()
                t_elapsed=t2-t_start
                timestamps.append(t_elapsed)
                runtime.append(t2-t1)
                pred_bbox = outputs['bbox']
                pred_bboxes.append(pred_bbox)
                scores.append(outputs['best_score'])
                input_fidx.append(fidx)
            if t_elapsed>t_total:
                break
            toc += cv2.getTickCount() - tic
            track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())

        #save results and run time
        if args.overwrite or not os.path.isfile(out_path):
            pickle.dump({
                'results_raw': pred_bboxes,
                'timestamps': timestamps,
                'input_fidx': input_fidx,
                'runtime': runtime,
            }, open(out_path, 'wb'))
        toc /= cv2.getTickFrequency()
        # save results
        # model_path = os.path.join('results', args.dataset, model_name)
        # if not os.path.isdir(model_path):
        #     os.makedirs(model_path)
        # result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        # with open(result_path, 'w') as f:
        #     for x in pred_bboxes:
        #         f.write(','.join([str(i) for i in x])+'\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx+1, video.name, toc, fidx / toc))
Exemplo n.º 23
0
def main():
    is_gpu_cuda_available = torch.cuda.is_available()
    if not is_gpu_cuda_available:
        raise RuntimeError(
            'Failed to locate a CUDA GPU. Program cannot continue..')
    num_gpus = torch.cuda.device_count()
    gpu_type = torch.cuda.get_device_name(0)
    print(f"You have {num_gpus} available of type: {gpu_type}")
    print("This might take a few minutes...Grab a cup of coffee\n")

    # load config
    cfg.merge_from_file(args.config)
    dataset_root = os.path.join(args.dataset_directory, args.dataset)
    print(f"dataset root-->{dataset_root}")

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.model_name
    print(f"Model name is {model_name}")

    total_lost = 0
    if args.dataset in vot_like_dataset:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0.85:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + args.skip_frames  # skip 1 frame
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            save_path = os.path.join(args.results_path, args.dataset,
                                     model_name, args.experiment_name,
                                     video.name)
            if not os.path.isdir(save_path):
                os.makedirs(save_path)
            result_path = os.path.join(save_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f:
                f.write(
                    f"{v_idx+1} Class: {video.name} | Time: {toc}s | Speed: {idx/toc}fps | Lost:{lost_number}  \n"
                )

            print(
                '({:3d}) Class: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
        with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f:
            f.write(
                f"Model architeture used --> {model_name} \ntotal lost: {total_lost} \n"
            )
            f.write(f"SKIP FRAMES USED --> {args.skip_frames}")
    else:
        # OPE tracking
        # will be implemented if needed in future
        pass
Exemplo n.º 24
0
def main():
    # load config
    # save_siamese_rpn()
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    # dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)
    dataset_root = datasets_root + args.dataset

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # save_backbone(model)

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0

    #multi-pass tracking,跟踪丢失后重新初始化的测试方法
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0

            # pred_bboxes包含两种类型的数据,类型1:整型数据,有1,2,0,三个值,分别表示跟踪开始,跟踪结束(丢失),跟踪丢失之后,间隔帧的占位符
            # 类型2:浮点类型的bbox,也就是跟踪结果
            pred_bboxes = []

            gru_seq_len = tracker.model.grus.seq_in_len
            video_len = len(video)

            for idx, (img, gt_bbox) in enumerate(video):

                if len(
                        gt_bbox
                ) == 4:  #如果gt是【x,y,w,h】的方式,转化为8个坐标信息(x1,y1,x2,y2,x3,y3,x4,y4)
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()

                #跟踪初始化
                if idx == frame_counter:  #   跟踪第一帧初始化
                    idxs = list(
                        map(lambda x, y: x + y, [idx] * gru_seq_len,
                            list(range(
                                0,
                                gru_seq_len))))  # 取出idx后面的gru_seq_len个序列的索引号
                    idxs = list(map(lambda x: min(x, video_len - 1),
                                    idxs))  # 避免索引号越界

                    tracker.template_idx = 0  #模板初始化的第一帧
                    for k in idxs:
                        init_img, init_gt_bbox = video[k]  #连续gru_seq_len帧初始化
                        #init_img, init_gt_bbox =video[idxs[0]]     #只用一帧作为初始化参数

                        cx, cy, w, h = get_axis_aligned_bbox(
                            np.array(init_gt_bbox)
                        )  #将倾斜框4个点坐标,转化为bbox,x,y为中心点形式(cx,cy,w,h)
                        init_gt_bbox = [
                            cx - (w - 1) / 2, cy - (h - 1) / 2, w, h
                        ]  #x,y,中心点形式,转化为左上角形式

                        tracker.init_gru(init_img, init_gt_bbox, k)

                    if k == 0:
                        pred_bbox = init_gt_bbox
                        pred_bboxes.append(1)

                #持续的后续跟踪
                elif idx > frame_counter:
                    outputs = tracker.track(img)  #对于下面的帧
                    pred_bbox = outputs['bbox']

                    #只有输出概率很高的时候才更新模板
                    if outputs['best_score'] > 0.95:
                        tracker.init_gru(img, pred_bbox, idx)

                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))

                    #查看初始化后的第一帧检测iou和score之间的关系
                    # if tracker.template_idx==4:
                    #     print("{:3.2f}\t{:3.2f}".format(overlap,outputs['best_score']))

                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)

                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()

                #绘制输出框,gt和mask都按照多边形来绘制,跟踪的bbox按照矩形来绘制
                if args.vis and idx > frame_counter:
                    #绘制多边形的gt
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    #绘制siamesemask输出的多边形
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    #绘制输出矩形框
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)

                    #添加图像标注,帧号和丢失次数
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join('results', args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            #结果路径的构成: ./results/VOT2018/model/baseline/ants1/ants1_001.txt
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))

            #pred_bboxes包含两种类型的数据,类型1:整型数据,有1,2,0,三个值,分别表示跟踪开始,跟踪结束(丢失),跟踪丢失之后,间隔帧的占位符
            # 类型2:浮点类型的bbox,也就是跟踪结果
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):  #整数代表开始,或者有丢失
                        f.write("{:d}\n".format(x))
                    else:  #浮点数才是bbox
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))

    #oetracking,跟踪丢失后不再重新初始化的测试方法
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:

                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))
Exemplo n.º 25
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../test_dataset', args.dataset)
    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = SiamAPNTracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0] + str(cfg.TRACK.w1)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            tic = cv2.getTickCount()
            if idx == 0:
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                tracker.init(img, gt_bbox_)
                pred_bbox = gt_bbox_
                scores.append(None)
                if 'VOT2018-LT' == args.dataset:
                    pred_bboxes.append([1])
                else:
                    pred_bboxes.append(pred_bbox)
            else:
                outputs = tracker.track(img)
                pred_bbox = outputs['bbox']
                pred_bboxes.append(pred_bbox)
                scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 255, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
        # save results

        model_path = os.path.join('results', args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))
Exemplo n.º 26
0
def main():
    mode = args.mode
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(args.dataset_dir, args.dataset)

    epsilon = args.epsilon

    # create model
    track_model = ModelBuilder()
    track_model0 = ModelBuilder()
    lr = args.lr

    # load model
    track_model = load_pretrain(track_model, args.snapshot).cuda().eval()
    track_model0 = load_pretrain(track_model0, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(track_model)
    tracker0 = build_tracker(track_model0)

    attacker = ModelAttacker().cuda().train()
    optimizer = optim.Adam(attacker.parameters(), lr=lr)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False,
                                            dataset_toolkit='oneshot',
                                            config=cfg)
    #
    # vid.name = {'ants1','ants3',....}
    # img, bbox, cls, delta, delta_weight
    # vid[0][0],vid[0][1],vid[0][2],vid[0][3],vid[0][4]

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    n_epochs = args.epochs

    for name, param in tracker.model.named_parameters():
        param.requires_grad_(False)

    # for name, param in attacker.named_parameters():
    #     if 'backbone' in name or 'neck' in name or 'rpn_head' in name:
    #         param.requires_grad_(False)

    # for name, param in tracker2.model.named_parameters():
    #     if 'backbone' in name or 'neck' in name or 'rpn_head' in name:
    #         param.requires_grad_(False)
    #     elif param.requires_grad:
    #         param.requires_grad_(True)
    #         # print(name, param.data)
    #         print('grad true ', name)
    #     else:
    #         print('grad false ', name)

    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019', 'OTB100']:

        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
                else:
                    if not os.path.exists(
                            os.path.join(args.savedir, video.name)):
                        os.mkdir(os.path.join(args.savedir, video.name))

            # set writing video parameters
            height, width, channels = video[0][0].shape
            out = cv2.VideoWriter(
                os.path.join(args.savedir, video.name + '.avi'),
                cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15,
                (width, height))
            frame_counter = 0
            frame_counter_adv = 0
            lost_number = 0
            lost_number_adv = 0
            toc = 0
            pred_bboxes = []
            pred_bboxes_adv = []

            pbar = tqdm(enumerate(video))
            for idx, (img, gt_bbox) in pbar:

                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]

                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]

                tic = cv2.getTickCount()

                ##########################################
                # # #  for state of the art tracking # # #
                ##########################################
                if mode == 0:
                    pred_bbox, _lost, frame_counter = stoa_track(
                        idx, frame_counter, img, gt_bbox, tracker0)

                ##########################################
                # # # # #  adversarial tracking  # # # # #
                ##########################################
                if idx == 0:
                    state = {
                        'img': img,
                        'gt_bbox': gt_bbox,
                        'video_name': video.name
                    }
                else:
                    state['img'] = img

                if mode == 1:
                    optimizer, state, ad_bbox = \
                        adversarial_train(idx, frame_counter_adv, state, attacker, tracker, optimizer, pbar)
                    if idx == 0:
                        break

                toc += cv2.getTickCount() - tic

                if idx > 0 and mode == 0:
                    bbox = list(map(int, pred_bbox))
                    cv2.rectangle(img, (bbox[0], bbox[1]),
                                  (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                  (0, 255, 255), 3)

                if idx > 0 and mode == 1:
                    ad_bbox = list(map(int, ad_bbox))
                    cv2.rectangle(
                        img, (ad_bbox[0], ad_bbox[1]),
                        (ad_bbox[0] + ad_bbox[2], ad_bbox[1] + ad_bbox[3]),
                        (0, 0, 255), 3)

                __gt_bbox = list(map(int, gt_bbox_))
                cv2.rectangle(
                    img, (__gt_bbox[0], __gt_bbox[1]),
                    (__gt_bbox[0] + __gt_bbox[2], __gt_bbox[1] + __gt_bbox[3]),
                    (0, 0, 0), 3)

                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 0, 0), 2)

                out.write(img)

            toc /= cv2.getTickFrequency()

            if mode == 0:
                # save results
                if args.dataset == 'OTB100':
                    model_path = os.path.join('results', args.dataset,
                                              model_name)
                    if not os.path.isdir(model_path):
                        os.makedirs(model_path)
                    result_path = os.path.join(model_path,
                                               '{}.txt'.format(video.name))
                    with open(result_path, 'w') as f:
                        for x in pred_bboxes_adv:
                            f.write(','.join([str(i) for i in x]) + '\n')
                else:
                    video_path = os.path.join('results', args.dataset,
                                              model_name, 'baseline',
                                              video.name)
                    if not os.path.isdir(video_path):
                        os.makedirs(video_path)
                    result_path = os.path.join(video_path,
                                               '{}_001.txt'.format(video.name))

                    # ii = 0
                    # with open(result_path, 'r') as f:
                    #     xs = f.readlines()
                    #     for x in xs:
                    #         if ii == 0:
                    #             pred_bboxes_adv[0] = ','.join([vot_float2str("%.4f", i) for i in pred_bboxes_adv[0]]) + '\n'
                    #         else:
                    #             pred_bboxes_adv.append(x)
                    #         ii += 1
                    #
                    # with open(result_path, 'w') as f:
                    #     for x in pred_bboxes_adv:
                    #         f.write(x)

                    with open(result_path, 'w') as f:
                        for x in pred_bboxes_adv:
                            if isinstance(x, int):
                                f.write("{:d}\n".format(x))
                            else:
                                f.write(','.join(
                                    [vot_float2str("%.4f", i)
                                     for i in x]) + '\n')
Exemplo n.º 27
0
        raise NotImplementedError


if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)
    benchmark = EAOBenchmark(dataset)

    model = ModelBuilder()
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # the resources you computer have, object_store_memory is shm
    ray.init(num_gpus=1, num_cpus=8, object_store_memory=30000000000)
    tune.register_trainable('fitness', fitness)

    # define search space
    params = {
        'penalty_k': hp.quniform('penalty_k', 0.001, 0.6, 0.001),
        'lr': hp.quniform('scale_lr', 0.1, 0.8, 0.001),
        'window_influence': hp.quniform('window_influence', 0.05, 0.65, 0.001),
        'search_region': hp.choice('search_region', [255]),
    }

    # stop condition for VOT and OTB
    if args.dataset.startswith('VOT'):
Exemplo n.º 28
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    # hp_search
    params = getattr(cfg.HP_SEARCH, args.dataset)
    hp = {'lr': params[0], 'penalty_k': params[1], 'window_lr': params[2]}

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = SiamCARTracker(model, cfg.TRACK)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-2] + str(hp['lr']) + '_' + str(
        hp['penalty_k']) + '_' + str(hp['window_lr'])

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            tic = cv2.getTickCount()
            if idx == 0:
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                tracker.init(img, gt_bbox_)
                pred_bbox = gt_bbox_
                pred_bboxes.append(pred_bbox)
            else:
                outputs = tracker.track(img, hp)
                pred_bbox = outputs['bbox']
                pred_bboxes.append(pred_bbox)
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                if not any(map(math.isnan, gt_bbox)):
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
        # save results
        model_path = os.path.join('results', args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))
    os.chdir(model_path)
    save_file = '../%s' % dataset
    shutil.make_archive(save_file, 'zip')
    print('Records saved at', save_file + '.zip')
Exemplo n.º 29
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    # create model
    model = ModelBuilder(cfg)

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [gt_bbox[0], gt_bbox[1],
                       gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5 # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))],
                            True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(img, [np.array(pred_bbox, np.int).reshape((-1, 1, 2))],
                                True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join('results', args.dataset, model_name,
                    'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format(
                    v_idx+1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]),
                                  (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                        'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                        '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
                result_path = os.path.join(video_path,
                        '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write("{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                        '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name, video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
                result_path = os.path.join(video_path,
                        '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path, '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
                v_idx+1, video.name, toc, idx / toc))
Exemplo n.º 30
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    #model_name = args.snapshot.split('/')[-1].split('.')[0]
    #total_lost = 0

    #cur_dir = os.path.dirname(os.path.realpath(__file__))

    #dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    video_path = '/home/yuuzhao/Documents/project/pysot/testing_dataset/VOT2016'
    #lists = open('/home/lichao/tracking/LaSOT_Evaluation_Toolkit/sequence_evaluation_config/' + setfile + '.txt', 'r')
    #list_file = [line.strip() for line in lists]

    category = os.listdir(video_path)
    category.sort()

    # create dataset
    #dataset = DatasetFactory.create_dataset(name=args.dataset,dataset_root=dataset_root,load_img=False)

    template_acc = []
    template_cur = []
    init0 = []
    init = []
    pre = []
    gt = []  # init0 is reset init

    print("Category & Video:")
    for tmp_cat in category:
        tmp_cat_path = temp_path + '/' + tmp_cat
        if not os.path.isdir(tmp_cat_path):
            os.makedirs(tmp_cat_path)

        print("Category:", tmp_cat)
        video = os.listdir(join(video_path, tmp_cat))
        video.sort()
        #video_cut = video[0:frames_of_each_video]
        frame = 0

        #for picture in video_cut:  # 这个循环或许该去掉
        #    print("Frame:", picture)
        gt_path = join(video_path, tmp_cat, 'groundtruth.txt')

        ground_truth = np.loadtxt(gt_path, delimiter=',')
        # num_frames = len(ground_truth);  # num_frames = min(num_frames, frame_max)
        num_frames = frames_of_each_video
        # print("num_frames: ",num_frames)
        img_path = join(video_path, tmp_cat)
        # print("imgpath",img_path)
        imgFiles = [
            join(img_path, '%08d.jpg') % i for i in range(1, num_frames + 1)
        ]

        while frame < num_frames:
            print("frame:", frame)
            Polygon = ground_truth[frame]
            cx, cy, w, h = get_axis_aligned_bbox(Polygon)
            gt_rect = [cx, cy, w, h]

            image_file = imgFiles[frame]
            # target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
            img = cv2.imread(image_file)  # HxWxC

            if frame == 0:
                tracker.init(img, gt_rect)
            if w * h != 0:
                # image_file = imgFiles[frame]
                # target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
                # img = cv2.imread(image_file)  # HxWxC
                zf_acc = tracker.get_zf(img, gt_rect)

                output = tracker.track(img)
                pre_rect = output['bbox']
                zf_pre = tracker.get_zf(img, pre_rect)

                template_acc.append(zf_acc)
                template_cur.append((zf_pre))

                print("ACC&PRE")
                init0.append(0)
                init.append(frame)
                frame_reset = 0
                pre.append(0)
                gt.append(1)
                while frame < (num_frames - 1):
                    print("while ", frame, "<", num_frames)
                    frame = frame + 1
                    frame_reset = frame_reset + 1
                    image_file = imgFiles[frame]
                    if not image_file:
                        break

                    Polygon = ground_truth[frame]
                    cx, cy, w, h = get_axis_aligned_bbox(Polygon)
                    gt_rect = [cx, cy, w, h]

                    img = cv2.imread(image_file)  # HxWxC
                    zf_acc = tracker.get_zf(img, gt_rect)

                    output = tracker.track(img)
                    pre_rect = output['bbox']
                    zf_pre = tracker.get_zf(img, pre_rect)

                    # print("zf_pre:",zf_pre.shape)
                    # print("zf_acc:",zf_acc.shape)
                    # pdb.set_trace()
                    template_acc.append(zf_acc)
                    template_cur.append(zf_pre)
                    init0.append(frame_reset)
                    init.append(frame)
                    pre.append(1)
                    if frame == (num_frames - 1):  # last frame
                        print("if frame == num_frames-1")
                        gt.append(0)
                    else:
                        gt.append(1)

                    pre_rect_arr = np.array(pre_rect)
                    cx, cy, w, h = get_axis_aligned_bbox(pre_rect_arr)
                    target_pos, target_siz = np.array([cx,
                                                       cy]), np.array([w, h])

                    res = cxy_wh_2_rect(target_pos, target_siz)

                    if reset:
                        cx, cy, w, h = get_axis_aligned_bbox(
                            ground_truth[frame])
                        gt_rect = [cx, cy, w, h]
                        gt_rect = np.array(gt_rect)
                        iou = overlap_ratio(gt_rect, res)
                        if iou <= 0:
                            break
            else:
                print("else")
                template_acc.append(
                    torch.zeros([1, 3, 127, 127], dtype=torch.float32))
                template_cur.append(
                    torch.zeros([1, 3, 127, 127], dtype=torch.float32))
                init0.append(0)
                init.append(frame)
                pre.append(1)
                if frame == (num_frames - 1):  # last frame
                    gt.append(0)
                else:
                    gt.append(1)
            frame = frame + 1  # skip

        #写出一次
        #print("template_acc:",template_acc)
        #print("template_cur:",template_cur)
        #print("init:", init)
        #print("init0:",init0)
        #print("pre:",pre)

        #template_acc_con = np.concatenate(template_acc);
        #template_cur_con = np.concatenate(template_cur)

        print("write for each video")
        np.save(tmp_cat_path + '/template', template_acc)
        np.save(tmp_cat_path + '/templatei', template_cur)
        np.save(tmp_cat_path + '/init0', init0)
        np.save(tmp_cat_path + '/init', init)
        np.save(tmp_cat_path + '/pre', pre)
        np.save(tmp_cat_path + '/gt', gt)
    print("template")