예제 #1
0
def init_actor(actor, image, gt):
    batch_num = 64
    maxiter = 10
    actor = actor.cuda()
    actor.train()
    init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001)
    loss_func= torch.nn.MSELoss()
    _, _, out_flag_first = getbatch_actor(np.array(image), np.array(gt).reshape([1, 4]))
    actor_samples = np.round(gen_samples(SampleGenerator('uniform', (image.shape[1],image.shape[0]), 0.3, 1.5, None),
                                         gt, 640, [0.6, 1], [0.9, 1.1]))
    idx = np.random.permutation(actor_samples.shape[0])
    batch_img_g, batch_img_l, _ = getbatch_actor(np.array(image), actor_samples)
    batch_distance = cal_distance(actor_samples, np.tile(gt, [actor_samples.shape[0], 1]))
    batch_distance = np.array(batch_distance).astype(np.float32)
    while (len(idx) < batch_num * maxiter):
        idx = np.concatenate([idx, np.random.permutation(actor_samples.shape[0])])

    pointer = 0
    # torch_image = loader(image.resize((255,255),Image.ANTIALIAS)).unsqueeze(0).cuda() - 128./255.
    for iter in range(maxiter):
        next = pointer + batch_num
        cur_idx = idx[pointer: next]
        pointer = next
        feat = actor(batch_img_l[cur_idx], batch_img_g[cur_idx])
        loss = loss_func(feat, (torch.FloatTensor(batch_distance[cur_idx])).cuda())
        del feat
        actor.zero_grad()
        loss.backward()
        init_optimizer.step()
        if loss.item() < 0.0001:
            deta_flag = 0
            return deta_flag
        deta_flag = 1
    return deta_flag, out_flag_first
예제 #2
0
    def init_actor(self, image, gt):
        np.random.seed(123)
        torch.manual_seed(456)
        torch.cuda.manual_seed(789)

        batch_num = 64
        maxiter = 3
        self.actor.train()
        init_optimizer = torch.optim.Adam(self.actor.parameters(), lr=0.0001)
        loss_func = torch.nn.MSELoss()
        _, _, out_flag_first = getbatch_actor(np.array(image),
                                              np.array(gt).reshape([1, 4]))

        actor_samples = np.round(
            gen_samples(
                SampleGenerator('gaussian', (image.shape[1], image.shape[0]),
                                0.2, 1.1, None), gt, 192, [0.6, 1], None))
        idx = np.random.permutation(actor_samples.shape[0])

        batch_img_g, batch_img_l, _ = getbatch_actor(np.array(image),
                                                     actor_samples)
        batch_distance = cal_distance(actor_samples,
                                      np.tile(gt, [actor_samples.shape[0], 1]))
        batch_distance = np.array(batch_distance).astype(np.float32)
        while (len(idx) < batch_num * maxiter):
            idx = np.concatenate(
                [idx, np.random.permutation(actor_samples.shape[0])])

        pointer = 0
        # torch_image = loader(image.resize((255, 255), Image.ANTIALIAS)).unsqueeze(0).cuda()
        for iter in range(maxiter):
            next = pointer + batch_num
            cur_idx = idx[pointer:next]
            pointer = next
            feat = self.actor(batch_img_l[cur_idx], batch_img_g[cur_idx])
            loss = loss_func(feat, (torch.FloatTensor(
                batch_distance[cur_idx])).cuda())

            self.actor.zero_grad()
            loss.backward()
            init_optimizer.step()
            if False:
                print("init actor Iter %d, Loss %.10f" % (iter, loss.item()))
            if loss.item() < 0.0001:
                deta_flag = 0
                return deta_flag
            deta_flag = 1
        return deta_flag
예제 #3
0
    def update(self, image):

        np_img = np.array(cv2.resize(image, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
        np_imgs = []
        for i in range(T_N):
            np_imgs.append(np_img)

        with torch.no_grad():
            responses = self.siam(torch.Tensor(self.templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda())
            action = self.pi(responses.permute(1, 0, 2, 3).cuda()).cpu().detach().numpy()
        action_id = np.argmax(action)
        # print(action_id)
        if action[0][action_id] * 0.9 > action[0][0]:
            template = self.templates[action_id]
        else:
            template = self.templates[0]
        with torch.no_grad():
            siam_box = self.tracker.update(image, template)
        siam_box = np.round([siam_box[0], siam_box[1], siam_box[2] -siam_box[0], siam_box[3] - siam_box[1]])
        bbox = siam_box
        for i in range(5):
            img_g, img_l, out_flag = getbatch_actor(np.array(image), np.array(bbox).reshape([1, 4]))
            with torch.no_grad():
                deta_pos = self.actor(img_l, img_g)
            deta_pos = deta_pos.data.clone().cpu().numpy()
            if deta_pos[:, 2] > 0.2 or deta_pos[:, 2] < -0.2:
                deta_pos[:, 2] = 0
            if self.deta_flag or (out_flag and not self.out_flag_first):
                deta_pos[:, 2] = 0

            pos_ = np.round(move_crop_tracking(np.array(siam_box), deta_pos, (image.shape[1], image.shape[0]), self.rate))
            bbox = pos_
        result = bbox
        return result
예제 #4
0
def init_actor(actor, image, gt):
    np.random.seed(123)
    torch.manual_seed(456)
    torch.cuda.manual_seed(789)

    batch_num = 64
    maxiter = 80
    actor = actor.cuda()
    actor.train()
    init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001)
    loss_func = torch.nn.MSELoss()
    actor_samples = np.round(
        gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, None), gt,
                    1500, [0.6, 1], [0.9, 1.1]))
    idx = np.random.permutation(actor_samples.shape[0])
    batch_img = getbatch_actor(np.array(image), actor_samples)
    batch_distance = cal_distance(actor_samples,
                                  np.tile(gt, [actor_samples.shape[0], 1]))
    batch_distance = np.array(batch_distance).astype(np.float32)
    while (len(idx) < batch_num * maxiter):
        idx = np.concatenate(
            [idx, np.random.permutation(actor_samples.shape[0])])

    pointer = 0
    torch_image = loader(image.resize((225, 225),
                                      Image.ANTIALIAS)).unsqueeze(0).cuda()
    for iter in range(maxiter):
        next = pointer + batch_num
        cur_idx = idx[pointer:next]
        pointer = next
        feat = actor(batch_img[cur_idx],
                     torch_image.repeat(batch_num, 1, 1, 1))
        loss = loss_func(
            feat,
            Variable(torch.FloatTensor(batch_distance[cur_idx])).cuda())

        actor.zero_grad()
        loss.backward()
        init_optimizer.step()
        if opts['show_train']:
            print("Iter %d, Loss %.10f" % (iter, loss.item()))
        if loss.item() < 0.0001:
            deta_flag = 0
            return deta_flag
        deta_flag = 1
        return deta_flag
예제 #5
0
def run_tracking(
        img_list,
        init_bbox,
        gt=None,
        savefig_dir='',
        display=False,
        siamfc_path="../models/siamfc_pretrained.pth",
        policy_path="../models/template_policy/11200_template_policy.pth",
        gpu_id=0):

    rate = init_bbox[2] / init_bbox[3]
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    # result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    # result_bb[0] = target_bbox
    success = 1
    actor = Actor()  #.load_state_dict(torch.load("../Models/500_actor.pth"))

    pretrained_act_dict = torch.load(
        "../models/Double_agent/95600_DA_actor.pth")

    actor_dict = actor.state_dict()

    pretrained_act_dict = {
        k: v
        for k, v in pretrained_act_dict.items() if k in actor_dict
    }

    actor_dict.update(pretrained_act_dict)

    actor.load_state_dict(actor_dict)

    siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id)
    siamEmbed = siam = SiameseNet(BaselineEmbeddingNet())
    T_N = opts['T_N']
    pi = T_Policy(T_N)
    weights_init(pi)
    pretrained_pi_dict = torch.load(
        '../models/template_policy/95600_template_policy.pth')
    pi_dict = pi.state_dict()
    pretrained_pi_dict = {
        k: v
        for k, v in pretrained_pi_dict.items() if k in pi_dict
    }
    # pretrained_pi_dict = {k: v for k, v in pretrained_pi_dict.items() if k in pi_dict and k.startswith("conv")}
    pi_dict.update(pretrained_pi_dict)
    pi.load_state_dict(pi_dict)

    if opts['use_gpu']:
        actor = actor.cuda()
        siamEmbed = siamEmbed.cuda()
        pi = pi.cuda()

    image = cv2.cvtColor(cv2.imread(img_list[0]), cv2.COLOR_BGR2RGB)
    #init

    deta_flag, out_flag_first = init_actor(actor, image, target_bbox)
    template = siamfc.init(image, target_bbox)
    # t = template
    templates = []
    for i in range(T_N):
        templates.append(template)
    spf_total = 0
    # Display
    savefig = 0

    if display or savefig:
        dpi = 80.0
        figsize = (image.shape[1] / dpi, image.shape[0] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image)

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result[0, :2]),
                             result[0, 2],
                             result[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)
    imageVar_first = cv2.Laplacian(
        crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var()
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = cv2.cvtColor(cv2.imread(img_list[i]), cv2.COLOR_BGR2RGB)
        np_img = np.array(
            cv2.resize(image, (255, 255),
                       interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
        np_imgs = []
        for i in range(T_N):
            np_imgs.append(np_img)
        if imageVar_first > 200:
            imageVar = cv2.Laplacian(
                crop_image_blur(np.array(image), target_bbox),
                cv2.CV_64F).var()
        else:
            imageVar = 200

        if opts['use_gpu']:
            responses = siamEmbed(
                torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                torch.Tensor(np_imgs).float().cuda())
        else:
            responses = siamEmbed(
                torch.Tensor(templates).permute(0, 3, 1, 2).float(),
                torch.Tensor(np_imgs).float())
        # responses = []
        # for i in range(T_N):
        #     template = templates[i]
        #     response = siamfc.response_map(image, template)
        #     responses.append(response[None,:,:])
        if opts['use_gpu']:
            pi_input = torch.Tensor(responses.cpu()).permute(1, 0, 2, 3).cuda()
            action = pi(pi_input).cpu().detach().numpy()
        else:
            pi_input = torch.Tensor(responses).permute(1, 0, 2, 3)
            action = pi(pi_input).numpy()
        action_id = np.argmax(action)
        template = templates[action_id]
        siam_box = siamfc.update(image, templates[0])
        siam_box = np.round([
            siam_box[0], siam_box[1], siam_box[2] - siam_box[0],
            siam_box[3] - siam_box[1]
        ])
        print(siam_box)
        # Estimate target bbox
        img_g, img_l, out_flag = getbatch_actor(
            np.array(image),
            np.array(siam_box).reshape([1, 4]))
        deta_pos = actor(img_l, img_g)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if deta_flag or (out_flag and not out_flag_first):
            deta_pos[:, 2] = 0

        pos_ = np.round(
            move_crop_tracking(np.array(siam_box), deta_pos,
                               (image.shape[1], image.shape[0]), rate))

        if imageVar > 100:
            target_bbox = pos_
            result[i] = target_bbox
        if i % 10 == 0:
            template = siamfc.init(image, pos_)
            templates.append(template)
            templates.pop(1)

        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result[i, :2])
            rect.set_width(result[i, 2])
            rect.set_height(result[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)),
                            dpi=dpi)
        if display:
            if gt is None:
                print
                ("Frame %d/%d,  Time %.3f" % \
                (i, len(img_list), spf))
            else:
                if opts['show_train']:
                    print
                    ("Frame %d/%d, Overlap %.3f, Time %.3f, box (%d,%d,%d,%d), var %d" % \
                    (i, len(img_list), overlap_ratio(gt[i], result[i])[0], spf, target_bbox[0],
                     target_bbox[1], target_bbox[2], target_bbox[3], imageVar))

    fps = len(img_list) / spf_total
    return result, fps
예제 #6
0
def run_tracking(img_list, init_bbox, gt=None, savefig_dir='', display=False):
    np.random.seed(123)
    torch.manual_seed(456)
    torch.cuda.manual_seed(789)

    rate = init_bbox[2] / init_bbox[3]
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox
    success = 1
    # Init model
    actor = Actor()  #.load_state_dict(torch.load("../Models/500_actor.pth"))

    model = MDNet()  #.load_state_dict(torch.load("../Models/500_critic.pth"))

    # pretrained_act_dict = torch.load("../Models/100_actor.pth")
    # pretrained_cri_dict = torch.load("../Models/100_critic.pth")
    #
    # actor_dict = actor.state_dict()
    # model_dict = model.state_dict()
    #
    # pretrained_act_dict = {k: v for k, v in pretrained_act_dict.items() if k in actor_dict}
    # pretrained_cri_dict = {k: v for k, v in pretrained_cri_dict.items() if k in model_dict and not k.startwith("branches")}
    #
    # actor_dict.update(pretrained_act_dict)
    # model_dict.update(pretrained_cri_dict)
    #
    # actor.load_state_dict(actor_dict)
    # model.load_state_dict(model_dict)

    if opts['use_gpu']:
        model = model.cuda()
        actor = actor.cuda()
    model.set_learnable_params(opts['fc_layers'])
    criterion = BinaryLoss()
    init_optimizer = set_optimizer(model, opts['lr_init'])
    update_optimizer = set_optimizer(model, opts['lr_update'])

    image = Image.open(img_list[0]).convert('RGB')

    bbreg_examples = gen_samples(
        SampleGenerator('uniform', image.size, 0.3, 1.5, 1.1), target_bbox,
        opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)

    pos_examples = gen_samples(
        SampleGenerator('gaussian', image.size, 0.1, 1.2), target_bbox,
        opts['n_pos_init'], opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        gen_samples(SampleGenerator('uniform', image.size, 1, 2,
                                    1.1), target_bbox, opts['n_neg_init'] // 2,
                    opts['overlap_neg_init']),
        gen_samples(SampleGenerator('whole', image.size, 0, 1.2,
                                    1.1), target_bbox, opts['n_neg_init'] // 2,
                    opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])

    deta_flag, out_flag_first = init_actor(actor, image, target_bbox)

    init_generator = SampleGenerator('gaussian',
                                     image.size,
                                     opts['trans_f'],
                                     1,
                                     valid=False)
    sample_generator = SampleGenerator('gaussian',
                                       image.size,
                                       opts['trans_f'],
                                       opts['scale_f'],
                                       valid=False)
    pos_generator = SampleGenerator('gaussian', image.size, 0.1, 1.2)
    neg_generator = SampleGenerator('uniform', image.size, 1.5, 1.2)

    pos_feats_all = [pos_feats[:opts['n_pos_update']]]
    neg_feats_all = [neg_feats[:opts['n_neg_update']]]
    data_frame = [0]

    pos_score = forward_samples(model,
                                image,
                                np.array(init_bbox).reshape([1, 4]),
                                out_layer='fc6')
    img_learn = [image]
    pos_learn = [init_bbox]
    score_pos = [pos_score.cpu().numpy()[0][1]]
    frame_learn = [0]
    pf_frame = []

    update_lenth = 10
    spf_total = 0
    # Display
    savefig = 0

    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image)

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)
    detetion = 0
    imageVar_first = cv2.Laplacian(
        crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var()
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')
        if imageVar_first > 200:
            imageVar = cv2.Laplacian(
                crop_image_blur(np.array(image), target_bbox),
                cv2.CV_64F).var()
        else:
            imageVar = 200
        # Estimate target bbox
        img_g, img_l, out_flag = getbatch_actor(
            np.array(image),
            np.array(target_bbox).reshape([1, 4]))
        # torch_image = loader(image.resize((255,255),Image.ANTIALIAS)).unsqueeze(0).cuda().add( - 128./255.)
        deta_pos = actor(img_l, img_g)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if deta_flag or (out_flag and not out_flag_first):
            deta_pos[:, 2] = 0
        if len(pf_frame) and i == (pf_frame[-1] + 1):
            deta_pos[:, 2] = 0

        pos_ = np.round(
            move_crop_tracking(target_bbox, deta_pos,
                               (image.size[1], image.size[0]), rate))
        r = forward_samples(model,
                            image,
                            np.array(pos_).reshape([1, 4]),
                            out_layer='fc6')
        r = r.cpu().numpy()

        if r[0][1] > 0 and imageVar > 100:
            target_bbox = pos_
            target_score = r[0][1]
            bbreg_bbox = pos_
            success = 1
            if True:
                fin_score = r[0][1]
                img_learn.append(image)
                pos_learn.append(target_bbox)
                score_pos.append(fin_score)
                frame_learn.append(i)
                while len(img_learn) > update_lenth * 2:
                    del img_learn[0]
                    del pos_learn[0]
                    del score_pos[0]
                    del frame_learn[0]
            result[i] = target_bbox
            result_bb[i] = bbreg_bbox
        else:
            detetion += 1
            if len(pf_frame) == 0:
                pf_frame = [i]
            else:
                pf_frame.append(i)

        if (len(frame_learn) == update_lenth * 2
                and data_frame[-1] not in frame_learn) or data_frame[-1] == 0:
            for num in range(max(0,
                                 img_learn.__len__() - update_lenth),
                             img_learn.__len__()):
                if frame_learn[num] not in data_frame:
                    gt_ = pos_learn[num]
                    image_ = img_learn[num]
                    pos_examples = np.round(
                        gen_samples(pos_generator, gt_, opts['n_pos_update'],
                                    opts['overlap_pos_update']))
                    neg_examples = np.round(
                        gen_samples(neg_generator, gt_, opts['n_neg_update'],
                                    opts['overlap_neg_update']))
                    pos_feats_ = forward_samples(model, image_, pos_examples)
                    neg_feats_ = forward_samples(model, image_, neg_examples)

                    pos_feats_all.append(pos_feats_)
                    neg_feats_all.append(neg_feats_)
                    data_frame.append(frame_learn[num])
                    if len(pos_feats_all) > 10:
                        del pos_feats_all[0]
                        del neg_feats_all[0]
                        del data_frame[0]
                else:
                    pos_feats_ = pos_feats_all[data_frame.index(
                        frame_learn[num])]
                    neg_feats_ = neg_feats_all[data_frame.index(
                        frame_learn[num])]

                if num == max(0, img_learn.__len__() - update_lenth):
                    pos_feats = pos_feats_
                    neg_feats = neg_feats_

                else:
                    pos_feats = torch.cat([pos_feats, pos_feats_], 0)
                    neg_feats = torch.cat([neg_feats, neg_feats_], 0)
            train(model, criterion, update_optimizer, pos_feats, neg_feats,
                  opts['maxiter_update'])

        if success:
            sample_generator.set_trans_f(opts['trans_f'])
        else:
            sample_generator.set_trans_f(opts['trans_f_expand'])

        if imageVar < 100:
            samples = gen_samples(init_generator, target_bbox,
                                  opts['n_samples'])
        else:
            samples = gen_samples(sample_generator, target_bbox,
                                  opts['n_samples'])

            if i < 20 or ((init_bbox[2] * init_bbox[3]) > 1000 and
                          (target_bbox[2] * target_bbox[3] /
                           (init_bbox[2] * init_bbox[3]) > 2.5
                           or target_bbox[2] * target_bbox[3] /
                           (init_bbox[2] * init_bbox[3]) < 0.4)):
                sample_generator.set_trans_f(opts['trans_f_expand'])
                samples_ = np.round(
                    gen_samples(
                        sample_generator,
                        np.hstack([
                            target_bbox[0:2] + target_bbox[2:4] / 2 -
                            init_bbox[2:4] / 2, init_bbox[2:4]
                        ]), opts['n_samples']))
                samples = np.vstack([samples, samples_])

            sample_scores = forward_samples(model,
                                            image,
                                            samples,
                                            out_layer='fc6')
            top_scores, top_idx = sample_scores[:, 1].topk(5)
            top_idx = top_idx.cpu().numpy()
            target_score = top_scores.mean()
            target_bbox = samples[top_idx].mean(axis=0)
            success = target_score > opts['success_thr']

            # Bbox regression
            if success:
                bbreg_samples = samples[top_idx]
                bbreg_feats = forward_samples(model, image, bbreg_samples)
                bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
                bbreg_bbox = bbreg_samples.mean(axis=0)

                img_learn.append(image)
                pos_learn.append(target_bbox)
                score_pos.append(target_score)
                frame_learn.append(i)
                while len(img_learn) > 2 * update_lenth:
                    del img_learn[0]
                    del pos_learn[0]
                    del score_pos[0]
                    del frame_learn[0]

            else:
                bbreg_bbox = target_bbox

            # Copy previous result at failure
            if not success:
                target_bbox = result[i - 1]
                bbreg_bbox = result_bb[i - 1]

            # Save result
            result[i] = target_bbox
            result_bb[i] = bbreg_bbox

        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)),
                            dpi=dpi)
        if display:
            if gt is None:
                print
                "Frame %d/%d, Score %.3f, Time %.3f" % \
                (i, len(img_list), target_score, spf)
            else:
                if opts['show_train']:
                    print
                    "Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f, box (%d,%d,%d,%d), var %d" % \
                    (i, len(img_list), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf, target_bbox[0],
                     target_bbox[1], target_bbox[2], target_bbox[3], imageVar)

    fps = len(img_list) / spf_total
    return result, result_bb, fps
예제 #7
0
    def update(self, image):
        # image = loader(image.resize((225,225),Image.ANTIALIAS)).unsqueeze(0).cuda()
        self.frame += 1
        update_lenth = 10
        np_image = np.array(image)
        if self.imageVar_first > 200:
            imageVar = cv2.Laplacian(
                crop_image_blur(np_image, self.target_bbox), cv2.CV_64F).var()
        else:
            imageVar = 200
        img_l = getbatch_actor(np_image, self.target_bbox.reshape([1, 4]))
        torch_image = loader(image.resize(
            (225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda()
        deta_pos = self.actor(img_l, torch_image)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if self.deta_flag:
            deta_pos[:, 2] = 0
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if len(self.pf_frame) and self.frame == (self.pf_frame[-1] + 1):
            deta_pos[:, 2] = 0
        pos_ = np.round(
            move_crop(self.target_bbox, deta_pos,
                      (image.size[1], image.size[0]), self.rate))
        r = forward_samples(self.model,
                            image,
                            np.array(pos_).reshape([1, 4]),
                            out_layer='fc6')
        r = r.cpu().numpy()
        if r[0][1] > 0 and imageVar > 100:
            self.target_bbox = pos_
            target_score = r[0][1]
            bbreg_bbox = pos_
            success = 1
            if True:
                fin_score = r[0][1]
                self.img_learn.append(image)
                self.pos_learn.append(self.target_bbox)
                self.score_pos.append(fin_score)
                self.frame_learn.append(self.frame)
                while len(self.img_learn) > update_lenth * 2:
                    del self.img_learn[0]
                    del self.pos_learn[0]
                    del self.score_pos[0]
                    del self.frame_learn[0]
            self.result[self.frame] = self.target_bbox
            self.result_bb[self.frame] = bbreg_bbox
        else:
            self.detetion += 1
            if len(self.pf_frame) == 0:
                self.pf_frame = [self.frame]
            else:
                self.pf_frame.append(self.frame)

        if (len(self.frame_learn) == update_lenth * 2 and self.data_frame[-1]
                not in self.frame_learn) or self.data_frame[-1] == 0:
            for num in range(max(0,
                                 self.img_learn.__len__() - update_lenth),
                             self.img_learn.__len__()):
                if self.frame_learn[num] not in self.data_frame:
                    gt_ = self.pos_learn[num]
                    image_ = self.img_learn[num]
                    pos_examples = np.round(
                        gen_samples(self.pos_generator, gt_,
                                    opts['n_pos_update'],
                                    opts['overlap_pos_update']))
                    neg_examples = np.round(
                        gen_samples(self.neg_generator, gt_,
                                    opts['n_neg_update'],
                                    opts['overlap_neg_update']))
                    pos_feats_ = forward_samples(self.model, image_,
                                                 pos_examples)
                    neg_feats_ = forward_samples(self.model, image_,
                                                 neg_examples)

                    self.pos_feats_all.append(pos_feats_)
                    self.neg_feats_all.append(neg_feats_)
                    self.data_frame.append(self.frame_learn[num])
                    if len(self.pos_feats_all) > 10:
                        del self.pos_feats_all[0]
                        del self.neg_feats_all[0]
                        del self.data_frame[0]
                else:
                    pos_feats_ = self.pos_feats_all[self.data_frame.index(
                        self.frame_learn[num])]
                    neg_feats_ = self.neg_feats_all[self.data_frame.index(
                        self.frame_learn[num])]

                if num == max(0, self.img_learn.__len__() - update_lenth):
                    pos_feats = pos_feats_
                    neg_feats = neg_feats_

                else:
                    pos_feats = torch.cat([pos_feats, pos_feats_], 0)
                    neg_feats = torch.cat([neg_feats, neg_feats_], 0)
            train(self.model, self.criterion, self.update_optimizer, pos_feats,
                  neg_feats, opts['maxiter_update'])

            if success:
                self.sample_generator.set_trans_f(opts['trans_f'])
            else:
                self.sample_generator.set_trans_f(opts['trans_f_expand'])
            if imageVar < 100:
                samples = gen_samples(self.init_generator, self.target_bbox,
                                      opts['n_samples'])
            else:
                samples = gen_samples(self.sample_generator, self.target_bbox,
                                      opts['n_samples'])

                if i < 20 or ((self.init_bbox[2] * self.init_bbox[3]) > 1000
                              and
                              (self.target_bbox[2] * self.target_bbox[3] /
                               (self.init_bbox[2] * self.init_bbox[3]) > 2.5
                               or self.target_bbox[2] * self.target_bbox[3] /
                               (self.init_bbox[2] * self.init_bbox[3]) < 0.4)):
                    self.sample_generator.set_trans_f(opts['trans_f_expand'])
                    samples_ = np.round(
                        gen_samples(
                            self.sample_generator,
                            np.hstack([
                                self.target_bbox[0:2] +
                                self.target_bbox[2:4] / 2 -
                                self.init_bbox[2:4] / 2, self.init_bbox[2:4]
                            ]), opts['n_samples']))
                    samples = np.vstack([samples, samples_])

                sample_scores = forward_samples(self.model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)
                top_idx = top_idx.cpu().numpy()
                target_score = top_scores.mean()
                self.target_bbox = samples[top_idx].mean(axis=0)
                success = target_score > opts['success_thr']

                # Bbox regression
                if success:
                    bbreg_samples = samples[top_idx]
                    bbreg_feats = forward_samples(self.model, image,
                                                  bbreg_samples)
                    bbreg_samples = self.bbreg.predict(bbreg_feats,
                                                       bbreg_samples)
                    bbreg_bbox = bbreg_samples.mean(axis=0)

                    self.img_learn.append(image)
                    self.pos_learn.append(self.target_bbox)
                    self.score_pos.append(self.target_score)
                    self.frame_learn.append(i)
                    while len(self.img_learn) > 2 * update_lenth:
                        del self.img_learn[0]
                        del self.pos_learn[0]
                        del self.score_pos[0]
                        del self.frame_learn[0]

                else:
                    bbreg_bbox = self.target_bbox

                # Copy previous result at failure
                if not success:
                    target_bbox = self.result[self.frame - 1]
                    bbreg_bbox = self.result_bb[self.frame - 1]

                # Save result
                self.result[self.frame] = target_bbox
                self.result_bb[self.frame] = bbreg_bbox

        return self.target_bbox
예제 #8
0
def run_tracking(img_list,
                 init_bbox,
                 gt=None,
                 savefig_dir='',
                 display=False,
                 siamfc_path="../models/siamfc_pretrained.pth",
                 gpu_id=0):
    rate = init_bbox[2] / init_bbox[3]
    target_bbox = np.array(init_bbox)

    siam = SiameseNet(BaselineEmbeddingNet())
    weights_init(siam)
    pretrained_siam = torch.load(siamfc_path)
    siam_dict = siam.state_dict()
    pretrained_siam = {
        k: v
        for k, v in pretrained_siam.items() if k in siam_dict
    }
    siam_dict.update(pretrained_siam)
    siam.load_state_dict(siam_dict)

    pi = T_Policy(T_N)
    # weights_init(policy)

    pretrained_pi_dict = torch.load(
        '../models/template_policy/95600_template_policy.pth')
    pi_dict = pi.state_dict()
    pretrained_pi_dict = {
        k: v
        for k, v in pretrained_pi_dict.items() if k in pi_dict
    }
    # pretrained_pi_dict = {k: v for k, v in pretrained_pi_dict.items() if k in pi_dict and k.startswith("conv")}
    pi_dict.update(pretrained_pi_dict)
    pi.load_state_dict(pi_dict)

    actor = Actor()  # .load_state_dict(torch.load("../Models/500_actor.pth"))
    pretrained_act_dict = torch.load(
        "../models/Double_agent/95600_DA_actor.pth")
    actor_dict = actor.state_dict()
    pretrained_act_dict = {
        k: v
        for k, v in pretrained_act_dict.items() if k in actor_dict
    }
    actor_dict.update(pretrained_act_dict)
    actor.load_state_dict(actor_dict)

    tracker = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id)
    if opts['use_gpu']:
        siam = siam.cuda()
        policy = pi.cuda()
        # tracker = tracker.cuda()

    image = cv2.cvtColor(cv2.imread(img_list[0]), cv2.COLOR_BGR2RGB)
    result = np.zeros((len(img_list), 4))
    result[0] = target_bbox

    spf_total = 0
    if display:
        dpi = 80.0
        figsize = (image.shape[1] / dpi, image.shape[0] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image)

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result[0, :2]),
                             result[0, 2],
                             result[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        siam_rect = plt.Rectangle(tuple(result[0, :2]),
                                  result[0, 2],
                                  result[0, 3],
                                  linewidth=3,
                                  edgecolor="#0000ff",
                                  zorder=1,
                                  fill=False)
        ax.add_patch(rect)
        ax.add_patch(siam_rect)

        if display:
            plt.pause(.01)
            plt.draw()

    deta_flag, out_flag_first = init_actor(actor, image, target_bbox)
    template = tracker.init(image, init_bbox)
    templates = []
    for i in range(T_N):
        templates.append(template)
    for frame in range(1, len(gt)):
        tic = time.time()
        # img = Image.open(frame_name_list[frame]).convert('RGB')
        cv2_img = cv2.cvtColor(cv2.imread(img_list[frame]), cv2.COLOR_BGR2RGB)
        np_img = np.array(
            cv2.resize(cv2_img, (255, 255),
                       interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
        np_imgs = []
        for i in range(T_N):
            np_imgs.append(np_img)
        with torch.no_grad():
            responses = siam(
                torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                torch.Tensor(np_imgs).float().cuda())
            action = policy(responses.permute(
                1, 0, 2, 3).cuda()).cpu().detach().numpy()
        action_id = np.argmax(action)
        print(action_id)
        template = templates[action_id]
        with torch.no_grad():
            siam_box = tracker.update(cv2_img, template)
        siam_box = np.round([
            siam_box[0], siam_box[1], siam_box[2] - siam_box[0],
            siam_box[3] - siam_box[1]
        ])
        img_g, img_l, out_flag = getbatch_actor(
            np.array(image),
            np.array(siam_box).reshape([1, 4]))
        with torch.no_grad():
            deta_pos = actor(img_l, img_g)
        deta_pos = deta_pos.data.clone().cpu().numpy()
        if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05:
            deta_pos[:, 2] = 0
        if deta_flag or (out_flag and not out_flag_first):
            deta_pos[:, 2] = 0

        pos_ = np.round(
            move_crop_tracking(np.array(siam_box), deta_pos,
                               (image.shape[1], image.shape[0]), rate))
        result[frame] = pos_
        spf = time.time() - tic
        spf_total += spf

        if display:
            im.set_data(cv2_img)

            if gt is not None:
                gt_rect.set_xy(gt[frame, :2])
                gt_rect.set_width(gt[frame, 2])
                gt_rect.set_height(gt[frame, 3])

            rect.set_xy(result[frame, :2])
            rect.set_width(result[frame, 2])
            rect.set_height(result[frame, 3])

            siam_rect.set_xy(siam_box[:2])
            siam_rect.set_width(siam_box[2])
            siam_rect.set_height(siam_box[3])

            if display:
                plt.pause(.01)
                plt.draw()
        if frame % INTERVRAL == 0:
            template = tracker.init(cv2_img, gt[frame])
            # template = tracker.init(cv2_img, pos_* 0.5+ siam_box*0.5)
            templates.append(template)
            templates.pop(1)
    fps = len(img_list) / spf_total
    return result, fps