Пример #1
0
def track_video(model, video):
    toc, regions = 0, []
    image_files, gt = video['image_files'], video['gt']
    for f, image_file in enumerate(image_files):
        im = cv2.imread(image_file)  # TODO: batch load
        tic = cv2.getTickCount()
        if f == 0:  # init
            target_pos, target_sz = rect_2_cxy_wh(gt[f])
            state = SiamRPN_init(im, target_pos, target_sz,
                                 model)  # init tracker
            location = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            regions.append(gt[f])
        elif f > 0:  # tracking
            state = SiamRPN_track(state, im)  # track
            location = cxy_wh_2_rect(state['target_pos'] + 1,
                                     state['target_sz'])
            regions.append(location)
        toc += cv2.getTickCount() - tic

        if args.visualization and f >= 0:  # visualization
            if f == 0:
                cv2.destroyAllWindows()
            if len(gt[f]) == 8:
                cv2.polylines(im,
                              [np.array(gt[f], np.int).reshape(
                                  (-1, 1, 2))], True, (0, 255, 0), 3)
            else:
                cv2.rectangle(im, (gt[f, 0], gt[f, 1]),
                              (gt[f, 0] + gt[f, 2], gt[f, 1] + gt[f, 3]),
                              (0, 255, 0), 3)
            if len(location) == 8:
                cv2.polylines(im, [location.reshape((-1, 1, 2))], True,
                              (0, 255, 255), 3)
            else:
                location = [int(l) for l in location]  #
                cv2.rectangle(
                    im, (location[0], location[1]),
                    (location[0] + location[2], location[1] + location[3]),
                    (0, 255, 255), 3)
            cv2.putText(im, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 255), 2)

            cv2.imshow(video['name'], im)
            cv2.waitKey(1)
    toc /= cv2.getTickFrequency()

    # save result
    video_path = join('../test', args.dataset, 'SiamRPN_AlexNet_OTB2015')
    if not isdir(video_path): makedirs(video_path)
    result_path = join(video_path, '{:s}.txt'.format(video['name']))
    with open(result_path, "w") as fin:
        for x in regions:
            fin.write(','.join([str(i) for i in x]) + '\n')

    print('({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps'.format(
        v_id, video['name'], toc, f / toc))
    return f / toc
Пример #2
0
def main(imagedir, gtdir):
    # load net
    net_file = join(realpath(dirname(__file__)), 'SiamRPNBIG.model')
    net = SiamRPNBIG()
    net.load_state_dict(torch.load(net_file))
    net.eval().cuda()

    # warm up
    for i in range(10):
        net.temple(
            torch.autograd.Variable(torch.FloatTensor(1, 3, 127, 127)).cuda())
        net(torch.autograd.Variable(torch.FloatTensor(1, 3, 255, 255)).cuda())

    # start to track
    # get the first frame groundtruth
    gt_file = os.path.join(gtdir, 'gt.txt')
    with open(gt_file, 'r') as f:
        lines = f.readlines()
    gt = []
    for line in lines:
        line = line.split(' ')
        gt.append([int(float(x)) for x in line])
    init_bbox = gt[0]  # top-left x y,w,h
    target_pos, target_sz = rect_2_cxy_wh(
        init_bbox)  # top-left x y,w,h --> center x y,w,h

    image_list = glob.glob(os.path.join(imagedir, '*.jpg'))
    image_list.sort()
    im = cv2.imread(image_list[0])  # HxWxC

    state = SiamRPN_init(im, target_pos, target_sz, net)  # init tracker
    bboxes = []
    for i in range(1, len(gt)):
        im = cv2.imread(image_list[i])  # HxWxC
        state = SiamRPN_track(state, im)  # track
        res = cxy_wh_2_rect(
            state['target_pos'],
            state['target_sz'])  # center x y,w,h --> top-left x y,w,h
        bboxes.append(res.tolist())

    _, precision, precision_auc, iou = _compile_results(gt[1:], bboxes)
    print(' -- Precision ' + "(20 px)"  + ': ' + "%.2f" % precision +\
            ' -- Precision AUC: ' + "%.2f" % precision_auc + \
            ' -- IOU: ' + "%.2f" % iou + ' --')

    isSavebbox = True
    if isSavebbox:
        print('saving bbox...')
        res_bbox_file = os.path.join('results_bbox.json')
        json.dump(bboxes, open(res_bbox_file, 'w'), indent=2)

    isSavevideo = True
    if isSavevideo:
        print('saving video...')
        save_video(image_list, bboxes)
    print('done')
Пример #3
0
    for jj in range(int(_data_provider.cur_img_num / 100)):

        exemplar_list = [None for i in range(batch_size)]
        source_list = [None for i in range(batch_size)]
        instance_list = [None for i in range(batch_size)]
        exemplar_cxy_list = [[None, None] for i in range(batch_size)]
        source_cxy_list = [[None, None] for i in range(batch_size)]
        instance_cxy_list = [[None, None] for i in range(batch_size)]

        for batch in range(batch_size):
            pairs, gts = _data_provider.rand_pick_pair()

            exemplar = cv2.imread(pairs[0])
            source = cv2.imread(pairs[1])
            instance = cv2.imread(pairs[2])
            exemplar_pos, exemplar_sz = rect_2_cxy_wh(gts[0])
            source_pos, source_sz = rect_2_cxy_wh(gts[1])
            instance_pos, instance_sz = rect_2_cxy_wh(gts[2])

            exemplar_list[batch] = exemplar
            source_list[batch] = source
            instance_list[batch] = instance
            exemplar_cxy_list[batch][0], exemplar_cxy_list[batch][
                1] = exemplar_pos, exemplar_sz
            source_cxy_list[batch][0], source_cxy_list[batch][
                1] = source_pos, source_sz
            instance_cxy_list[batch][0], instance_cxy_list[batch][
                1] = instance_pos, instance_sz

        train_config = SiamRPN_init_batch(exemplar_list, exemplar_cxy_list,
                                          net)
Пример #4
0
    total_failure = 0
    warped_images = []
    video_length = vot.get_frame_length(video_name)
    #ground truth bounding box
    gts = vot.get_gts(video_name)
    frame_tags = vot.get_frame_tags(video_name)
    video_frames = vot.get_frames(video_name)
    flow_dir = os.path.join(flow_dirs, video_name + '.txt')
    img_dir = os.path.join(vot_dir, video_name,'color')
    confidence_dir = os.path.join('/home/jianingq/backward_flow_confidence_vot/',video_name)

    #initialize network
    # image and init box
    init_rbox = gts[0]
    if(len(init_rbox) == 4):
        [cx, cy], [w, h] = rect_2_cxy_wh(init_rbox)
    else:
        [cx, cy, w, h] = get_axis_aligned_bbox(init_rbox)

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    im = video_frames[0]# HxWxC
    state = SiamRPN_init(im, target_pos, target_sz, net)

    detection_box = [int(cx-w/2),int(cy-h/2),int(cx+w/2),int(cy+h/2)]

    for i in range(0,video_length - 1):
        #track
        im1 = np.copy(video_frames[i])
        im2 = np.copy(video_frames[i + 1])
        entropy_data = np.load(os.path.join(confidence_dir,format(i+1, '08')+'_entropy.npy'))
Пример #5
0
def rtaa_attack(net,
                x_init,
                x,
                gt,
                target_pos,
                target_sz,
                scale_z,
                p,
                eps=10,
                alpha=1,
                iteration=10,
                x_val_min=0,
                x_val_max=255):
    x = Variable(x.data)
    x_adv = Variable(x_init.data, requires_grad=True)

    alpha = eps * 1.0 / iteration

    for i in range(iteration):
        delta, score = net(x_adv)

        score_temp = score.permute(1, 2, 3, 0).contiguous().view(2, -1)
        score = torch.transpose(score_temp, 0, 1)
        delta1 = delta.permute(1, 2, 3, 0).contiguous().view(4, -1)
        delta = delta.permute(1, 2, 3,
                              0).contiguous().view(4, -1).data.cpu().numpy()

        # calculate proposals
        gt_cen = rect_2_cxy_wh(gt)
        gt_cen = np.tile(gt_cen, (p.anchor.shape[0], 1))
        gt_cen[:, 0] = ((gt_cen[:, 0] - target_pos[0]) * scale_z -
                        p.anchor[:, 0]) / p.anchor[:, 2]
        gt_cen[:, 1] = ((gt_cen[:, 1] - target_pos[1]) * scale_z -
                        p.anchor[:, 1]) / p.anchor[:, 3]
        gt_cen[:, 2] = np.log(gt_cen[:, 2] * scale_z) / p.anchor[:, 2]
        gt_cen[:, 3] = np.log(gt_cen[:, 3] * scale_z) / p.anchor[:, 3]

        # create pseudo proposals randomly
        gt_cen_pseudo = rect_2_cxy_wh(gt)
        gt_cen_pseudo = np.tile(gt_cen_pseudo, (p.anchor.shape[0], 1))

        rate_xy1 = np.random.uniform(0.3, 0.5)
        rate_xy2 = np.random.uniform(0.3, 0.5)
        rate_wd = np.random.uniform(0.7, 0.9)

        gt_cen_pseudo[:, 0] = ((gt_cen_pseudo[:, 0] - target_pos[0] -
                                rate_xy1 * gt_cen_pseudo[:, 2]) * scale_z -
                               p.anchor[:, 0]) / p.anchor[:, 2]
        gt_cen_pseudo[:, 1] = ((gt_cen_pseudo[:, 1] - target_pos[1] -
                                rate_xy2 * gt_cen_pseudo[:, 3]) * scale_z -
                               p.anchor[:, 1]) / p.anchor[:, 3]
        gt_cen_pseudo[:, 2] = np.log(
            gt_cen_pseudo[:, 2] * rate_wd * scale_z) / p.anchor[:, 2]
        gt_cen_pseudo[:, 3] = np.log(
            gt_cen_pseudo[:, 3] * rate_wd * scale_z) / p.anchor[:, 3]

        delta[0, :] = (delta[0, :] * p.anchor[:, 2] +
                       p.anchor[:, 0]) / scale_z + target_pos[0]
        delta[1, :] = (delta[1, :] * p.anchor[:, 3] +
                       p.anchor[:, 1]) / scale_z + target_pos[1]
        delta[2, :] = (np.exp(delta[2, :]) * p.anchor[:, 2]) / scale_z
        delta[3, :] = (np.exp(delta[3, :]) * p.anchor[:, 3]) / scale_z
        location = np.array([
            delta[0] - delta[2] / 2, delta[1] - delta[3] / 2, delta[2],
            delta[3]
        ])

        label = overlap_ratio(location, gt)

        # set thresold to define positive and negative samples, following the training step
        iou_hi = 0.6
        iou_low = 0.3

        # make labels
        y_pos = np.where(label > iou_hi, 1, 0)
        y_pos = torch.from_numpy(y_pos).cuda().long()
        y_neg = np.where(label < iou_low, 0, 1)
        y_neg = torch.from_numpy(y_neg).cuda().long()
        pos_index = np.where(y_pos.cpu() == 1)
        neg_index = np.where(y_neg.cpu() == 0)
        index = np.concatenate((pos_index, neg_index), axis=1)

        # make pseudo lables
        y_pos_pseudo = np.where(label > iou_hi, 0, 1)
        y_pos_pseudo = torch.from_numpy(y_pos_pseudo).cuda().long()
        y_neg_pseudo = np.where(label < iou_low, 1, 0)
        y_neg_pseudo = torch.from_numpy(y_neg_pseudo).cuda().long()

        y_truth = y_pos
        y_pseudo = y_pos_pseudo

        # calculate classification loss
        loss_truth_cls = -F.cross_entropy(score[index], y_truth[index])
        loss_pseudo_cls = -F.cross_entropy(score[index], y_pseudo[index])
        loss_cls = (loss_truth_cls - loss_pseudo_cls) * (1)

        # calculate regression loss
        loss_truth_reg = -rpn_smoothL1(delta1, gt_cen, y_pos)
        loss_pseudo_reg = -rpn_smoothL1(delta1, gt_cen_pseudo, y_pos)
        loss_reg = (loss_truth_reg - loss_pseudo_reg) * (5)

        # final adversarial loss
        loss = loss_cls + loss_reg

        # calculate the derivative
        net.zero_grad()
        if x_adv.grad is not None:
            x_adv.grad.data.fill_(0)
        loss.backward(retain_graph=True)

        adv_grad = where((x_adv.grad > 0) | (x_adv.grad < 0), x_adv.grad, 0)
        adv_grad = torch.sign(adv_grad)
        x_adv = x_adv - alpha * adv_grad

        x_adv = where(x_adv > x + eps, x + eps, x_adv)
        x_adv = where(x_adv < x - eps, x - eps, x_adv)
        x_adv = torch.clamp(x_adv, x_val_min, x_val_max)
        x_adv = Variable(x_adv.data, requires_grad=True)

    return x_adv
Пример #6
0
net.load_state_dict(model_dict)
net.cuda().eval()

# warm up
for i in range(10):
    net.temple(torch.autograd.Variable(torch.ones(1, 3, 127, 127)).cuda(), \
     torch.autograd.Variable(torch.ones(1, 3, 271, 271)).cuda())
    net(torch.autograd.Variable(torch.ones(1, 3, 271, 271)).cuda())

for seq in OTB_seqs:
    _data_provider.pick_seq(seq)
    exemplar_path, exemplar_gt, cur_img_num = _data_provider.eval_pick_exemplar(
    )

    exemplar = cv2.imread(exemplar_path)
    exemplar_pos, exemplar_sz = rect_2_cxy_wh(exemplar_gt)
    state = SiamRPN_init(exemplar, exemplar_pos, exemplar_sz, net)
    save_file = save_res_path + seq + '_ours.txt'
    tracking_res = open(save_file, 'w')

    for idx in range(cur_img_num):

        instance_path = _data_provider.eval_pick_instance()
        instance = cv2.imread(instance_path)
        state = SiamRPN_track(state, instance)
        print('seq:{}:{} , score:{}'.format(seq, idx, state['score']))
        res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
        tracking_res.write('{},{},{},{}'.format(res[0], res[1], res[2],
                                                res[3]))
        tracking_res.write('\n')