def init_actor(actor, image, gt): batch_num = 64 maxiter = 10 actor = actor.cuda() actor.train() init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001) loss_func= torch.nn.MSELoss() _, _, out_flag_first = getbatch_actor(np.array(image), np.array(gt).reshape([1, 4])) actor_samples = np.round(gen_samples(SampleGenerator('uniform', (image.shape[1],image.shape[0]), 0.3, 1.5, None), gt, 640, [0.6, 1], [0.9, 1.1])) idx = np.random.permutation(actor_samples.shape[0]) batch_img_g, batch_img_l, _ = getbatch_actor(np.array(image), actor_samples) batch_distance = cal_distance(actor_samples, np.tile(gt, [actor_samples.shape[0], 1])) batch_distance = np.array(batch_distance).astype(np.float32) while (len(idx) < batch_num * maxiter): idx = np.concatenate([idx, np.random.permutation(actor_samples.shape[0])]) pointer = 0 # torch_image = loader(image.resize((255,255),Image.ANTIALIAS)).unsqueeze(0).cuda() - 128./255. for iter in range(maxiter): next = pointer + batch_num cur_idx = idx[pointer: next] pointer = next feat = actor(batch_img_l[cur_idx], batch_img_g[cur_idx]) loss = loss_func(feat, (torch.FloatTensor(batch_distance[cur_idx])).cuda()) del feat actor.zero_grad() loss.backward() init_optimizer.step() if loss.item() < 0.0001: deta_flag = 0 return deta_flag deta_flag = 1 return deta_flag, out_flag_first
def init_actor(self, image, gt): np.random.seed(123) torch.manual_seed(456) torch.cuda.manual_seed(789) batch_num = 64 maxiter = 3 self.actor.train() init_optimizer = torch.optim.Adam(self.actor.parameters(), lr=0.0001) loss_func = torch.nn.MSELoss() _, _, out_flag_first = getbatch_actor(np.array(image), np.array(gt).reshape([1, 4])) actor_samples = np.round( gen_samples( SampleGenerator('gaussian', (image.shape[1], image.shape[0]), 0.2, 1.1, None), gt, 192, [0.6, 1], None)) idx = np.random.permutation(actor_samples.shape[0]) batch_img_g, batch_img_l, _ = getbatch_actor(np.array(image), actor_samples) batch_distance = cal_distance(actor_samples, np.tile(gt, [actor_samples.shape[0], 1])) batch_distance = np.array(batch_distance).astype(np.float32) while (len(idx) < batch_num * maxiter): idx = np.concatenate( [idx, np.random.permutation(actor_samples.shape[0])]) pointer = 0 # torch_image = loader(image.resize((255, 255), Image.ANTIALIAS)).unsqueeze(0).cuda() for iter in range(maxiter): next = pointer + batch_num cur_idx = idx[pointer:next] pointer = next feat = self.actor(batch_img_l[cur_idx], batch_img_g[cur_idx]) loss = loss_func(feat, (torch.FloatTensor( batch_distance[cur_idx])).cuda()) self.actor.zero_grad() loss.backward() init_optimizer.step() if False: print("init actor Iter %d, Loss %.10f" % (iter, loss.item())) if loss.item() < 0.0001: deta_flag = 0 return deta_flag deta_flag = 1 return deta_flag
def update(self, image): np_img = np.array(cv2.resize(image, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) with torch.no_grad(): responses = self.siam(torch.Tensor(self.templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) action = self.pi(responses.permute(1, 0, 2, 3).cuda()).cpu().detach().numpy() action_id = np.argmax(action) # print(action_id) if action[0][action_id] * 0.9 > action[0][0]: template = self.templates[action_id] else: template = self.templates[0] with torch.no_grad(): siam_box = self.tracker.update(image, template) siam_box = np.round([siam_box[0], siam_box[1], siam_box[2] -siam_box[0], siam_box[3] - siam_box[1]]) bbox = siam_box for i in range(5): img_g, img_l, out_flag = getbatch_actor(np.array(image), np.array(bbox).reshape([1, 4])) with torch.no_grad(): deta_pos = self.actor(img_l, img_g) deta_pos = deta_pos.data.clone().cpu().numpy() if deta_pos[:, 2] > 0.2 or deta_pos[:, 2] < -0.2: deta_pos[:, 2] = 0 if self.deta_flag or (out_flag and not self.out_flag_first): deta_pos[:, 2] = 0 pos_ = np.round(move_crop_tracking(np.array(siam_box), deta_pos, (image.shape[1], image.shape[0]), self.rate)) bbox = pos_ result = bbox return result
def init_actor(actor, image, gt): np.random.seed(123) torch.manual_seed(456) torch.cuda.manual_seed(789) batch_num = 64 maxiter = 80 actor = actor.cuda() actor.train() init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001) loss_func = torch.nn.MSELoss() actor_samples = np.round( gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, None), gt, 1500, [0.6, 1], [0.9, 1.1])) idx = np.random.permutation(actor_samples.shape[0]) batch_img = getbatch_actor(np.array(image), actor_samples) batch_distance = cal_distance(actor_samples, np.tile(gt, [actor_samples.shape[0], 1])) batch_distance = np.array(batch_distance).astype(np.float32) while (len(idx) < batch_num * maxiter): idx = np.concatenate( [idx, np.random.permutation(actor_samples.shape[0])]) pointer = 0 torch_image = loader(image.resize((225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda() for iter in range(maxiter): next = pointer + batch_num cur_idx = idx[pointer:next] pointer = next feat = actor(batch_img[cur_idx], torch_image.repeat(batch_num, 1, 1, 1)) loss = loss_func( feat, Variable(torch.FloatTensor(batch_distance[cur_idx])).cuda()) actor.zero_grad() loss.backward() init_optimizer.step() if opts['show_train']: print("Iter %d, Loss %.10f" % (iter, loss.item())) if loss.item() < 0.0001: deta_flag = 0 return deta_flag deta_flag = 1 return deta_flag
def run_tracking( img_list, init_bbox, gt=None, savefig_dir='', display=False, siamfc_path="../models/siamfc_pretrained.pth", policy_path="../models/template_policy/11200_template_policy.pth", gpu_id=0): rate = init_bbox[2] / init_bbox[3] target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) # result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox # result_bb[0] = target_bbox success = 1 actor = Actor() #.load_state_dict(torch.load("../Models/500_actor.pth")) pretrained_act_dict = torch.load( "../models/Double_agent/95600_DA_actor.pth") actor_dict = actor.state_dict() pretrained_act_dict = { k: v for k, v in pretrained_act_dict.items() if k in actor_dict } actor_dict.update(pretrained_act_dict) actor.load_state_dict(actor_dict) siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id) siamEmbed = siam = SiameseNet(BaselineEmbeddingNet()) T_N = opts['T_N'] pi = T_Policy(T_N) weights_init(pi) pretrained_pi_dict = torch.load( '../models/template_policy/95600_template_policy.pth') pi_dict = pi.state_dict() pretrained_pi_dict = { k: v for k, v in pretrained_pi_dict.items() if k in pi_dict } # pretrained_pi_dict = {k: v for k, v in pretrained_pi_dict.items() if k in pi_dict and k.startswith("conv")} pi_dict.update(pretrained_pi_dict) pi.load_state_dict(pi_dict) if opts['use_gpu']: actor = actor.cuda() siamEmbed = siamEmbed.cuda() pi = pi.cuda() image = cv2.cvtColor(cv2.imread(img_list[0]), cv2.COLOR_BGR2RGB) #init deta_flag, out_flag_first = init_actor(actor, image, target_bbox) template = siamfc.init(image, target_bbox) # t = template templates = [] for i in range(T_N): templates.append(template) spf_total = 0 # Display savefig = 0 if display or savefig: dpi = 80.0 figsize = (image.shape[1] / dpi, image.shape[0] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image) if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result[0, :2]), result[0, 2], result[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) imageVar_first = cv2.Laplacian( crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var() for i in range(1, len(img_list)): tic = time.time() # Load image image = cv2.cvtColor(cv2.imread(img_list[i]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(image, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) if imageVar_first > 200: imageVar = cv2.Laplacian( crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var() else: imageVar = 200 if opts['use_gpu']: responses = siamEmbed( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) else: responses = siamEmbed( torch.Tensor(templates).permute(0, 3, 1, 2).float(), torch.Tensor(np_imgs).float()) # responses = [] # for i in range(T_N): # template = templates[i] # response = siamfc.response_map(image, template) # responses.append(response[None,:,:]) if opts['use_gpu']: pi_input = torch.Tensor(responses.cpu()).permute(1, 0, 2, 3).cuda() action = pi(pi_input).cpu().detach().numpy() else: pi_input = torch.Tensor(responses).permute(1, 0, 2, 3) action = pi(pi_input).numpy() action_id = np.argmax(action) template = templates[action_id] siam_box = siamfc.update(image, templates[0]) siam_box = np.round([ siam_box[0], siam_box[1], siam_box[2] - siam_box[0], siam_box[3] - siam_box[1] ]) print(siam_box) # Estimate target bbox img_g, img_l, out_flag = getbatch_actor( np.array(image), np.array(siam_box).reshape([1, 4])) deta_pos = actor(img_l, img_g) deta_pos = deta_pos.data.clone().cpu().numpy() if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if deta_flag or (out_flag and not out_flag_first): deta_pos[:, 2] = 0 pos_ = np.round( move_crop_tracking(np.array(siam_box), deta_pos, (image.shape[1], image.shape[0]), rate)) if imageVar > 100: target_bbox = pos_ result[i] = target_bbox if i % 10 == 0: template = siamfc.init(image, pos_) templates.append(template) templates.pop(1) spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result[i, :2]) rect.set_width(result[i, 2]) rect.set_height(result[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)), dpi=dpi) if display: if gt is None: print ("Frame %d/%d, Time %.3f" % \ (i, len(img_list), spf)) else: if opts['show_train']: print ("Frame %d/%d, Overlap %.3f, Time %.3f, box (%d,%d,%d,%d), var %d" % \ (i, len(img_list), overlap_ratio(gt[i], result[i])[0], spf, target_bbox[0], target_bbox[1], target_bbox[2], target_bbox[3], imageVar)) fps = len(img_list) / spf_total return result, fps
def run_tracking(img_list, init_bbox, gt=None, savefig_dir='', display=False): np.random.seed(123) torch.manual_seed(456) torch.cuda.manual_seed(789) rate = init_bbox[2] / init_bbox[3] target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox success = 1 # Init model actor = Actor() #.load_state_dict(torch.load("../Models/500_actor.pth")) model = MDNet() #.load_state_dict(torch.load("../Models/500_critic.pth")) # pretrained_act_dict = torch.load("../Models/100_actor.pth") # pretrained_cri_dict = torch.load("../Models/100_critic.pth") # # actor_dict = actor.state_dict() # model_dict = model.state_dict() # # pretrained_act_dict = {k: v for k, v in pretrained_act_dict.items() if k in actor_dict} # pretrained_cri_dict = {k: v for k, v in pretrained_cri_dict.items() if k in model_dict and not k.startwith("branches")} # # actor_dict.update(pretrained_act_dict) # model_dict.update(pretrained_cri_dict) # # actor.load_state_dict(actor_dict) # model.load_state_dict(model_dict) if opts['use_gpu']: model = model.cuda() actor = actor.cuda() model.set_learnable_params(opts['fc_layers']) criterion = BinaryLoss() init_optimizer = set_optimizer(model, opts['lr_init']) update_optimizer = set_optimizer(model, opts['lr_update']) image = Image.open(img_list[0]).convert('RGB') bbreg_examples = gen_samples( SampleGenerator('uniform', image.size, 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) pos_examples = gen_samples( SampleGenerator('gaussian', image.size, 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ gen_samples(SampleGenerator('uniform', image.size, 1, 2, 1.1), target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']), gen_samples(SampleGenerator('whole', image.size, 0, 1.2, 1.1), target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) deta_flag, out_flag_first = init_actor(actor, image, target_bbox) init_generator = SampleGenerator('gaussian', image.size, opts['trans_f'], 1, valid=False) sample_generator = SampleGenerator('gaussian', image.size, opts['trans_f'], opts['scale_f'], valid=False) pos_generator = SampleGenerator('gaussian', image.size, 0.1, 1.2) neg_generator = SampleGenerator('uniform', image.size, 1.5, 1.2) pos_feats_all = [pos_feats[:opts['n_pos_update']]] neg_feats_all = [neg_feats[:opts['n_neg_update']]] data_frame = [0] pos_score = forward_samples(model, image, np.array(init_bbox).reshape([1, 4]), out_layer='fc6') img_learn = [image] pos_learn = [init_bbox] score_pos = [pos_score.cpu().numpy()[0][1]] frame_learn = [0] pf_frame = [] update_lenth = 10 spf_total = 0 # Display savefig = 0 if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image) if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) detetion = 0 imageVar_first = cv2.Laplacian( crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var() for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') if imageVar_first > 200: imageVar = cv2.Laplacian( crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var() else: imageVar = 200 # Estimate target bbox img_g, img_l, out_flag = getbatch_actor( np.array(image), np.array(target_bbox).reshape([1, 4])) # torch_image = loader(image.resize((255,255),Image.ANTIALIAS)).unsqueeze(0).cuda().add( - 128./255.) deta_pos = actor(img_l, img_g) deta_pos = deta_pos.data.clone().cpu().numpy() if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if deta_flag or (out_flag and not out_flag_first): deta_pos[:, 2] = 0 if len(pf_frame) and i == (pf_frame[-1] + 1): deta_pos[:, 2] = 0 pos_ = np.round( move_crop_tracking(target_bbox, deta_pos, (image.size[1], image.size[0]), rate)) r = forward_samples(model, image, np.array(pos_).reshape([1, 4]), out_layer='fc6') r = r.cpu().numpy() if r[0][1] > 0 and imageVar > 100: target_bbox = pos_ target_score = r[0][1] bbreg_bbox = pos_ success = 1 if True: fin_score = r[0][1] img_learn.append(image) pos_learn.append(target_bbox) score_pos.append(fin_score) frame_learn.append(i) while len(img_learn) > update_lenth * 2: del img_learn[0] del pos_learn[0] del score_pos[0] del frame_learn[0] result[i] = target_bbox result_bb[i] = bbreg_bbox else: detetion += 1 if len(pf_frame) == 0: pf_frame = [i] else: pf_frame.append(i) if (len(frame_learn) == update_lenth * 2 and data_frame[-1] not in frame_learn) or data_frame[-1] == 0: for num in range(max(0, img_learn.__len__() - update_lenth), img_learn.__len__()): if frame_learn[num] not in data_frame: gt_ = pos_learn[num] image_ = img_learn[num] pos_examples = np.round( gen_samples(pos_generator, gt_, opts['n_pos_update'], opts['overlap_pos_update'])) neg_examples = np.round( gen_samples(neg_generator, gt_, opts['n_neg_update'], opts['overlap_neg_update'])) pos_feats_ = forward_samples(model, image_, pos_examples) neg_feats_ = forward_samples(model, image_, neg_examples) pos_feats_all.append(pos_feats_) neg_feats_all.append(neg_feats_) data_frame.append(frame_learn[num]) if len(pos_feats_all) > 10: del pos_feats_all[0] del neg_feats_all[0] del data_frame[0] else: pos_feats_ = pos_feats_all[data_frame.index( frame_learn[num])] neg_feats_ = neg_feats_all[data_frame.index( frame_learn[num])] if num == max(0, img_learn.__len__() - update_lenth): pos_feats = pos_feats_ neg_feats = neg_feats_ else: pos_feats = torch.cat([pos_feats, pos_feats_], 0) neg_feats = torch.cat([neg_feats, neg_feats_], 0) train(model, criterion, update_optimizer, pos_feats, neg_feats, opts['maxiter_update']) if success: sample_generator.set_trans_f(opts['trans_f']) else: sample_generator.set_trans_f(opts['trans_f_expand']) if imageVar < 100: samples = gen_samples(init_generator, target_bbox, opts['n_samples']) else: samples = gen_samples(sample_generator, target_bbox, opts['n_samples']) if i < 20 or ((init_bbox[2] * init_bbox[3]) > 1000 and (target_bbox[2] * target_bbox[3] / (init_bbox[2] * init_bbox[3]) > 2.5 or target_bbox[2] * target_bbox[3] / (init_bbox[2] * init_bbox[3]) < 0.4)): sample_generator.set_trans_f(opts['trans_f_expand']) samples_ = np.round( gen_samples( sample_generator, np.hstack([ target_bbox[0:2] + target_bbox[2:4] / 2 - init_bbox[2:4] / 2, init_bbox[2:4] ]), opts['n_samples'])) samples = np.vstack([samples, samples_]) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # Bbox regression if success: bbreg_samples = samples[top_idx] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) img_learn.append(image) pos_learn.append(target_bbox) score_pos.append(target_score) frame_learn.append(i) while len(img_learn) > 2 * update_lenth: del img_learn[0] del pos_learn[0] del score_pos[0] del frame_learn[0] else: bbreg_bbox = target_bbox # Copy previous result at failure if not success: target_bbox = result[i - 1] bbreg_bbox = result_bb[i - 1] # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)), dpi=dpi) if display: if gt is None: print "Frame %d/%d, Score %.3f, Time %.3f" % \ (i, len(img_list), target_score, spf) else: if opts['show_train']: print "Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f, box (%d,%d,%d,%d), var %d" % \ (i, len(img_list), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf, target_bbox[0], target_bbox[1], target_bbox[2], target_bbox[3], imageVar) fps = len(img_list) / spf_total return result, result_bb, fps
def update(self, image): # image = loader(image.resize((225,225),Image.ANTIALIAS)).unsqueeze(0).cuda() self.frame += 1 update_lenth = 10 np_image = np.array(image) if self.imageVar_first > 200: imageVar = cv2.Laplacian( crop_image_blur(np_image, self.target_bbox), cv2.CV_64F).var() else: imageVar = 200 img_l = getbatch_actor(np_image, self.target_bbox.reshape([1, 4])) torch_image = loader(image.resize( (225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda() deta_pos = self.actor(img_l, torch_image) deta_pos = deta_pos.data.clone().cpu().numpy() if self.deta_flag: deta_pos[:, 2] = 0 if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if len(self.pf_frame) and self.frame == (self.pf_frame[-1] + 1): deta_pos[:, 2] = 0 pos_ = np.round( move_crop(self.target_bbox, deta_pos, (image.size[1], image.size[0]), self.rate)) r = forward_samples(self.model, image, np.array(pos_).reshape([1, 4]), out_layer='fc6') r = r.cpu().numpy() if r[0][1] > 0 and imageVar > 100: self.target_bbox = pos_ target_score = r[0][1] bbreg_bbox = pos_ success = 1 if True: fin_score = r[0][1] self.img_learn.append(image) self.pos_learn.append(self.target_bbox) self.score_pos.append(fin_score) self.frame_learn.append(self.frame) while len(self.img_learn) > update_lenth * 2: del self.img_learn[0] del self.pos_learn[0] del self.score_pos[0] del self.frame_learn[0] self.result[self.frame] = self.target_bbox self.result_bb[self.frame] = bbreg_bbox else: self.detetion += 1 if len(self.pf_frame) == 0: self.pf_frame = [self.frame] else: self.pf_frame.append(self.frame) if (len(self.frame_learn) == update_lenth * 2 and self.data_frame[-1] not in self.frame_learn) or self.data_frame[-1] == 0: for num in range(max(0, self.img_learn.__len__() - update_lenth), self.img_learn.__len__()): if self.frame_learn[num] not in self.data_frame: gt_ = self.pos_learn[num] image_ = self.img_learn[num] pos_examples = np.round( gen_samples(self.pos_generator, gt_, opts['n_pos_update'], opts['overlap_pos_update'])) neg_examples = np.round( gen_samples(self.neg_generator, gt_, opts['n_neg_update'], opts['overlap_neg_update'])) pos_feats_ = forward_samples(self.model, image_, pos_examples) neg_feats_ = forward_samples(self.model, image_, neg_examples) self.pos_feats_all.append(pos_feats_) self.neg_feats_all.append(neg_feats_) self.data_frame.append(self.frame_learn[num]) if len(self.pos_feats_all) > 10: del self.pos_feats_all[0] del self.neg_feats_all[0] del self.data_frame[0] else: pos_feats_ = self.pos_feats_all[self.data_frame.index( self.frame_learn[num])] neg_feats_ = self.neg_feats_all[self.data_frame.index( self.frame_learn[num])] if num == max(0, self.img_learn.__len__() - update_lenth): pos_feats = pos_feats_ neg_feats = neg_feats_ else: pos_feats = torch.cat([pos_feats, pos_feats_], 0) neg_feats = torch.cat([neg_feats, neg_feats_], 0) train(self.model, self.criterion, self.update_optimizer, pos_feats, neg_feats, opts['maxiter_update']) if success: self.sample_generator.set_trans_f(opts['trans_f']) else: self.sample_generator.set_trans_f(opts['trans_f_expand']) if imageVar < 100: samples = gen_samples(self.init_generator, self.target_bbox, opts['n_samples']) else: samples = gen_samples(self.sample_generator, self.target_bbox, opts['n_samples']) if i < 20 or ((self.init_bbox[2] * self.init_bbox[3]) > 1000 and (self.target_bbox[2] * self.target_bbox[3] / (self.init_bbox[2] * self.init_bbox[3]) > 2.5 or self.target_bbox[2] * self.target_bbox[3] / (self.init_bbox[2] * self.init_bbox[3]) < 0.4)): self.sample_generator.set_trans_f(opts['trans_f_expand']) samples_ = np.round( gen_samples( self.sample_generator, np.hstack([ self.target_bbox[0:2] + self.target_bbox[2:4] / 2 - self.init_bbox[2:4] / 2, self.init_bbox[2:4] ]), opts['n_samples'])) samples = np.vstack([samples, samples_]) sample_scores = forward_samples(self.model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() self.target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # Bbox regression if success: bbreg_samples = samples[top_idx] bbreg_feats = forward_samples(self.model, image, bbreg_samples) bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) self.img_learn.append(image) self.pos_learn.append(self.target_bbox) self.score_pos.append(self.target_score) self.frame_learn.append(i) while len(self.img_learn) > 2 * update_lenth: del self.img_learn[0] del self.pos_learn[0] del self.score_pos[0] del self.frame_learn[0] else: bbreg_bbox = self.target_bbox # Copy previous result at failure if not success: target_bbox = self.result[self.frame - 1] bbreg_bbox = self.result_bb[self.frame - 1] # Save result self.result[self.frame] = target_bbox self.result_bb[self.frame] = bbreg_bbox return self.target_bbox
def run_tracking(img_list, init_bbox, gt=None, savefig_dir='', display=False, siamfc_path="../models/siamfc_pretrained.pth", gpu_id=0): rate = init_bbox[2] / init_bbox[3] target_bbox = np.array(init_bbox) siam = SiameseNet(BaselineEmbeddingNet()) weights_init(siam) pretrained_siam = torch.load(siamfc_path) siam_dict = siam.state_dict() pretrained_siam = { k: v for k, v in pretrained_siam.items() if k in siam_dict } siam_dict.update(pretrained_siam) siam.load_state_dict(siam_dict) pi = T_Policy(T_N) # weights_init(policy) pretrained_pi_dict = torch.load( '../models/template_policy/95600_template_policy.pth') pi_dict = pi.state_dict() pretrained_pi_dict = { k: v for k, v in pretrained_pi_dict.items() if k in pi_dict } # pretrained_pi_dict = {k: v for k, v in pretrained_pi_dict.items() if k in pi_dict and k.startswith("conv")} pi_dict.update(pretrained_pi_dict) pi.load_state_dict(pi_dict) actor = Actor() # .load_state_dict(torch.load("../Models/500_actor.pth")) pretrained_act_dict = torch.load( "../models/Double_agent/95600_DA_actor.pth") actor_dict = actor.state_dict() pretrained_act_dict = { k: v for k, v in pretrained_act_dict.items() if k in actor_dict } actor_dict.update(pretrained_act_dict) actor.load_state_dict(actor_dict) tracker = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id) if opts['use_gpu']: siam = siam.cuda() policy = pi.cuda() # tracker = tracker.cuda() image = cv2.cvtColor(cv2.imread(img_list[0]), cv2.COLOR_BGR2RGB) result = np.zeros((len(img_list), 4)) result[0] = target_bbox spf_total = 0 if display: dpi = 80.0 figsize = (image.shape[1] / dpi, image.shape[0] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image) if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result[0, :2]), result[0, 2], result[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) siam_rect = plt.Rectangle(tuple(result[0, :2]), result[0, 2], result[0, 3], linewidth=3, edgecolor="#0000ff", zorder=1, fill=False) ax.add_patch(rect) ax.add_patch(siam_rect) if display: plt.pause(.01) plt.draw() deta_flag, out_flag_first = init_actor(actor, image, target_bbox) template = tracker.init(image, init_bbox) templates = [] for i in range(T_N): templates.append(template) for frame in range(1, len(gt)): tic = time.time() # img = Image.open(frame_name_list[frame]).convert('RGB') cv2_img = cv2.cvtColor(cv2.imread(img_list[frame]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(cv2_img, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) with torch.no_grad(): responses = siam( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) action = policy(responses.permute( 1, 0, 2, 3).cuda()).cpu().detach().numpy() action_id = np.argmax(action) print(action_id) template = templates[action_id] with torch.no_grad(): siam_box = tracker.update(cv2_img, template) siam_box = np.round([ siam_box[0], siam_box[1], siam_box[2] - siam_box[0], siam_box[3] - siam_box[1] ]) img_g, img_l, out_flag = getbatch_actor( np.array(image), np.array(siam_box).reshape([1, 4])) with torch.no_grad(): deta_pos = actor(img_l, img_g) deta_pos = deta_pos.data.clone().cpu().numpy() if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if deta_flag or (out_flag and not out_flag_first): deta_pos[:, 2] = 0 pos_ = np.round( move_crop_tracking(np.array(siam_box), deta_pos, (image.shape[1], image.shape[0]), rate)) result[frame] = pos_ spf = time.time() - tic spf_total += spf if display: im.set_data(cv2_img) if gt is not None: gt_rect.set_xy(gt[frame, :2]) gt_rect.set_width(gt[frame, 2]) gt_rect.set_height(gt[frame, 3]) rect.set_xy(result[frame, :2]) rect.set_width(result[frame, 2]) rect.set_height(result[frame, 3]) siam_rect.set_xy(siam_box[:2]) siam_rect.set_width(siam_box[2]) siam_rect.set_height(siam_box[3]) if display: plt.pause(.01) plt.draw() if frame % INTERVRAL == 0: template = tracker.init(cv2_img, gt[frame]) # template = tracker.init(cv2_img, pos_* 0.5+ siam_box*0.5) templates.append(template) templates.pop(1) fps = len(img_list) / spf_total return result, fps