def __init__(self, img_list, gt, opts): self.img_list = np.asarray(img_list) self.gt = gt self.batch_frames = opts['batch_frames'] self.batch_pos = opts['batch_pos'] self.batch_neg = opts['batch_neg'] self.overlap_pos = opts['overlap_pos'] self.overlap_neg = opts['overlap_neg'] self.crop_size = opts['img_size'] self.padding = opts['padding'] self.flip = opts.get('flip', False) self.rotate = opts.get('rotate', 0) self.blur = opts.get('blur', 0) self.index = np.random.permutation(len(self.img_list)) self.pointer = 0 image = Image.open(self.img_list[0]).convert('RGB') self.pos_generator = SampleGenerator('uniform', image.size, opts['trans_pos'], opts['scale_pos']) self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg'])
def init_actor(actor, image, gt): np.random.seed(123) torch.manual_seed(456) torch.cuda.manual_seed(789) batch_num = 64 maxiter = 80 actor = actor.cuda() actor.train() init_optimizer = torch.optim.Adam(actor.parameters(), lr=0.0001) loss_func = torch.nn.MSELoss() actor_samples = np.round( gen_samples(SampleGenerator('uniform', image.size, 0.3, 1.5, None), gt, 1500, [0.6, 1], [0.9, 1.1])) idx = np.random.permutation(actor_samples.shape[0]) batch_img = getbatch_actor(np.array(image), actor_samples) batch_distance = cal_distance(actor_samples, np.tile(gt, [actor_samples.shape[0], 1])) batch_distance = np.array(batch_distance).astype(np.float32) while (len(idx) < batch_num * maxiter): idx = np.concatenate( [idx, np.random.permutation(actor_samples.shape[0])]) pointer = 0 torch_image = loader(image.resize((225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda() for iter in range(maxiter): next = pointer + batch_num cur_idx = idx[pointer:next] pointer = next feat = actor(batch_img[cur_idx], torch_image.repeat(batch_num, 1, 1, 1)) loss = loss_func( feat, Variable(torch.FloatTensor(batch_distance[cur_idx])).cuda()) actor.zero_grad() loss.backward() init_optimizer.step() if opts['show_train']: print("Iter %d, Loss %.10f" % (iter, loss.item())) if loss.item() < 0.0001: deta_flag = 0 return deta_flag deta_flag = 1 return deta_flag
target_bbox = np.array(gt_rect) model = MDNet(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) # Load first image image = Image.open(imagefile).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples)
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model model = MDNet(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update:这三个是【生成器】,不是产生的数据 sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox:指的是下一次的target_bbox,用于迭代 samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples( model, image, samples, out_layer='fc6' ) #forward_samples是根据当前的给当前的多个sample打分数,【相当于执行了一次网络判断】 top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) #多个target_bbox进行均值优化 success = target_score > 0 # Expand search area at failure:在【跟踪的同时】,根据跟踪情况,采用不同的sample生成参数 if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression :利用【当前:只是对当前帧图像进行回归,没有考虑前几帧】几名对应的box,最为regression的输入,来产生一个更好的Bbox if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update :每张图片都要更新一次模型,这次的输入数据就是【前几帧累计的正样本和负样本】 if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total return result, result_bb, fps
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False, model_path='models/model001.pth'): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model opts['model_path'] = model_path print('********') print('model:', opts['model_path']) print('********') assert (model_path == 'models/model000.pth' or model_path == 'models/model001.pth') if model_path == 'models/model000.pth': model = MDNet0(opts['model_path']) else: model = MDNet1(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) print(pos_feats) neg_feats = forward_samples(model, image, neg_examples) print(neg_feats) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) # for top 5 samples, maximize score using hill-climbing algorithm for j in range(5): sample_ = samples[top_idx[j]] last_top_score = None # hill-climbing search while True: sample_left_p = [ sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3] ] sample_left_n = [ sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3] ] sample_up_p = [ sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1 ] sample_up_n = [ sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1 ] sample_right_p = [ sample_[0], sample_[1], sample_[2] + 1, sample_[3] ] sample_right_n = [ sample_[0], sample_[1], sample_[2] - 1, sample_[3] ] sample_bottom_p = [ sample_[0], sample_[1], sample_[2], sample_[3] + 1 ] sample_bottom_n = [ sample_[0], sample_[1], sample_[2], sample_[3] - 1 ] all_samples = [ sample_left_p, sample_left_n, sample_up_p, sample_up_n, sample_right_p, sample_right_n, sample_bottom_p, sample_bottom_n ] hillClimbingSS = forward_samples(model, image, np.array(all_samples), out_layer='fc6') top_score, top_index = hillClimbingSS[:, 1].topk(1) top_score_float = top_score.cpu().numpy()[0] # End of hill climbing: this is THE BEST! if last_top_score != None: if top_score_float < last_top_score: break sample_ = all_samples[top_index] samples[top_idx[j]] = all_samples[top_index] last_top_score = top_score_float # modify sample scores array sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) sampleStore = [] for j in range(len(samples)): temp = [] for k in range(4): temp.append(samples[j][k]) sampleStore.append(temp) # if mean score of bbox < 0, find everywhere target_score = top_scores.mean() if target_score < 0: # print('') # print('last bbox:') # print(result[i-1]) last_left = result[i - 1][0] last_top = result[i - 1][1] # print('') # for j in range(len(samples)): print(j, samples[j], sample_scores[j]) # print('') # print('sample top scores (before):') # print(top_scores) # print(top_idx) cnt = 0 rl = [32, 16] for _ in range(len(rl)): everywhere_sample = [] # find everywhere (near the last bbox) meanWidth = 0.0 meanHeight = 0.0 for j in range(len(samples)): meanWidth += samples[j][2] meanHeight += samples[j][3] meanWidth /= len(samples) meanHeight /= len(samples) width = image.size[0] height = image.size[1] for j in range(32): for k in range(32): jk = [ last_left + (31 - 2 * j) * meanWidth / rl[_], last_top + (31 - 2 * k) * meanHeight / rl[_], meanWidth, meanHeight ] # print(j, k, jk) everywhere_sample.append(jk) everywhere_scores = forward_samples( model, image, np.array(everywhere_sample), out_layer='fc6') everywhere_top_scores, everywhere_top_idx = everywhere_scores[:, 1].topk( 5 ) # print('') # print('everywhere_sample:') # for j in range(len(everywhere_sample)): print(j, everywhere_sample[j], everywhere_scores[j]) # print('') # print('everywhere top scores (before):') # print(everywhere_top_scores) # print(everywhere_top_idx) # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]]) # for top 5 samples in everywhere_sample, maximize score using hill-climbing algorithm for j in range(5): # print('') sample_ = everywhere_sample[everywhere_top_idx[j]] last_top_score = None # hill-climbing search while True: sample_left_p = [ sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3] ] sample_left_n = [ sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3] ] sample_up_p = [ sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1 ] sample_up_n = [ sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1 ] sample_right_p = [ sample_[0], sample_[1], sample_[2] + 1, sample_[3] ] sample_right_n = [ sample_[0], sample_[1], sample_[2] - 1, sample_[3] ] sample_bottom_p = [ sample_[0], sample_[1], sample_[2], sample_[3] + 1 ] sample_bottom_n = [ sample_[0], sample_[1], sample_[2], sample_[3] - 1 ] all_samples = [ sample_left_p, sample_left_n, sample_up_p, sample_up_n, sample_right_p, sample_right_n, sample_bottom_p, sample_bottom_n ] hillClimbingSS = forward_samples(model, image, np.array(all_samples), out_layer='fc6') top_score, top_index = hillClimbingSS[:, 1].topk(1) top_score_float = top_score.cpu().numpy()[0] # End of hill climbing: this is THE BEST! if last_top_score != None: # print(last_top_score) if top_score_float < last_top_score: break sample_ = all_samples[top_index] everywhere_sample[ everywhere_top_idx[j]] = all_samples[top_index] last_top_score = top_score_float everywhere_scores = forward_samples( model, image, np.array(everywhere_sample), out_layer='fc6') everywhere_top_scores, everywhere_top_idx = everywhere_scores[:, 1].topk( 5 ) # print('') # print('everywhere top scores (after):') # print(everywhere_top_scores) # print(everywhere_top_idx) # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]]) # merge 'samples' with everywhere samples everywhere_top5 = [] for j in range(5): everywhere_top5.append( everywhere_sample[everywhere_top_idx[j]]) samples = np.concatenate((samples, np.array(everywhere_top5))) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) if top_scores.mean() > 0: # print('') # for j in range(len(samples)): print(j, samples[j], sample_scores[j]) # print('') # print('sample top scores (after):') # print(top_scores) # print(top_idx) break cnt += 1 # failure -> recover original samples if cnt == 2: # print('recovered') samples = np.array(sampleStore) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) # finally modify sample scores array sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join( savefig_dir, ('M' + model_path[14] + 'T3_' + '{:04d}.jpg'.format(i))), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total plt.close('all') return result, result_bb, fps, overlap
def initialize(self, image_file, box): self.frame_idx = 0 # Load first image cur_image = Image.open(image_file).convert("RGB") cur_image = np.asarray(cur_image) self.target_bbox = np.array(box) # Draw pos/neg samples ishape = cur_image.shape pos_examples = gen_samples( SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2), self.target_bbox, opts["n_pos_init"], opts["overlap_pos_init"], ) neg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 1, 2, 1.1), self.target_bbox, opts["n_neg_init"], opts["overlap_neg_init"], ) neg_examples = np.random.permutation(neg_examples) cur_bbreg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5, 1.1), self.target_bbox, opts["n_bbreg"], opts["overlap_bbreg"], opts["scale_bbreg"], ) # compute padded sample padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts["padding"] - 1.0) / 2.0).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts["padding"] - 1.0) / 2.0).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts["padding"] + 1.0) / 2.0).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts["padding"] + 1.0) / 2.0).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4), ) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) if opts["jitter"]: # horizontal shift jittered_scene_box_horizon = np.copy(padded_scene_box) jittered_scene_box_horizon[0, 0] -= 4.0 jitter_scale_horizon = 1.0 # vertical shift jittered_scene_box_vertical = np.copy(padded_scene_box) jittered_scene_box_vertical[0, 1] -= 4.0 jitter_scale_vertical = 1.0 jittered_scene_box_reduce1 = np.copy(padded_scene_box) jitter_scale_reduce1 = 1.1**(-1) # vertical shift jittered_scene_box_enlarge1 = np.copy(padded_scene_box) jitter_scale_enlarge1 = 1.1**(1) # scale reduction jittered_scene_box_reduce2 = np.copy(padded_scene_box) jitter_scale_reduce2 = 1.1**(-2) # scale enlarge jittered_scene_box_enlarge2 = np.copy(padded_scene_box) jitter_scale_enlarge2 = 1.1**(2) scene_boxes = np.concatenate( [ scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical, jittered_scene_box_reduce1, jittered_scene_box_enlarge1, jittered_scene_box_reduce2, jittered_scene_box_enlarge2, ], axis=0, ) jitter_scale = [ 1.0, jitter_scale_horizon, jitter_scale_vertical, jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2, jitter_scale_enlarge2, ] else: jitter_scale = [1.0] self.model.eval() for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts["img_size"], opts["img_size"]) / self.target_bbox[2:4]) ).astype("int64") * jitter_scale[bidx] cropped_image, cur_image_var = self.img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128.0 feat_map = self.model(cropped_image, out_layer="conv3") rel_target_bbox = np.copy(self.target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0, ) scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx] cur_pos_rois = samples2maskroi( cur_pos_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype("float32"))).cuda() cur_pos_feats = self.model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0, ) cur_neg_rois = samples2maskroi( cur_neg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype("float32"))).cuda() cur_neg_feats = self.model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() # bbreg rois batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) cur_bbreg_rois = np.copy(cur_bbreg_examples) cur_bbreg_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_bbreg_rois.shape[0], axis=0, ) scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx] cur_bbreg_rois = samples2maskroi( cur_bbreg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) cur_bbreg_rois = Variable( torch.from_numpy(cur_bbreg_rois.astype("float32"))).cuda() cur_bbreg_feats = self.model.roi_align_model( feat_map, cur_bbreg_rois) cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() self.feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats neg_feats = cur_neg_feats # bbreg feature bbreg_feats = cur_bbreg_feats bbreg_examples = cur_bbreg_examples else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) # bbreg feature bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) bbreg_examples = np.concatenate( (bbreg_examples, cur_bbreg_examples), axis=0) if pos_feats.size(0) > opts["n_pos_init"]: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats[pos_idx[0:opts["n_pos_init"]], :] if neg_feats.size(0) > opts["n_neg_init"]: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats[neg_idx[0:opts["n_neg_init"]], :] # bbreg if bbreg_feats.size(0) > opts["n_bbreg"]: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :] # print bbreg_examples.shape # open images and crop patch from obj extra_obj_size = np.array((opts["img_size"], opts["img_size"])) extra_crop_img_size = extra_obj_size * (opts["padding"] + 0.6) replicateNum = 100 for iidx in range(replicateNum): extra_target_bbox = np.copy(self.target_bbox) extra_scene_box = np.copy(extra_target_bbox) extra_scene_box_center = extra_scene_box[ 0:2] + extra_scene_box[2:4] / 2.0 extra_scene_box_size = extra_scene_box[2:4] * (opts["padding"] + 0.6) extra_scene_box[ 0:2] = extra_scene_box_center - extra_scene_box_size / 2.0 extra_scene_box[2:4] = extra_scene_box_size extra_shift_offset = np.clip(2.0 * np.random.randn(2), -4, 4) cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2) extra_scene_box[0] += extra_shift_offset[0] extra_scene_box[1] += extra_shift_offset[1] extra_scene_box[2:4] *= cur_extra_scale[0] scaled_obj_size = float(opts["img_size"]) / cur_extra_scale[0] cur_extra_cropped_image, _ = self.img_crop_model.crop_image( cur_image, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image = cur_extra_cropped_image.detach() cur_extra_pos_examples = gen_samples( SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2), extra_target_bbox, opts["n_pos_init"] / replicateNum, opts["overlap_pos_init"], ) cur_extra_neg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 2, 1.1), extra_target_bbox, opts["n_neg_init"] / replicateNum / 4, opts["overlap_neg_init"], ) # bbreg sample cur_extra_bbreg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 0.3, 1.5, 1.1), extra_target_bbox, opts["n_bbreg"] / replicateNum / 4, opts["overlap_bbreg"], opts["scale_bbreg"], ) batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1)) cur_extra_pos_rois = np.copy(cur_extra_pos_examples) cur_extra_pos_rois[:, 0:2] -= np.repeat( np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_pos_rois.shape[0], axis=0, ) cur_extra_pos_rois = samples2maskroi( cur_extra_pos_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts["padding"], ) cur_extra_pos_rois = np.concatenate( (batch_num, cur_extra_pos_rois), axis=1) batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) cur_extra_neg_rois = np.copy(cur_extra_neg_examples) cur_extra_neg_rois[:, 0:2] -= np.repeat( np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_neg_rois.shape[0], axis=0, ) cur_extra_neg_rois = samples2maskroi( cur_extra_neg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts["padding"], ) cur_extra_neg_rois = np.concatenate( (batch_num, cur_extra_neg_rois), axis=1) # bbreg rois batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) cur_extra_bbreg_rois[:, 0:2] -= np.repeat( np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_bbreg_rois.shape[0], axis=0, ) cur_extra_bbreg_rois = samples2maskroi( cur_extra_bbreg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts["padding"], ) cur_extra_bbreg_rois = np.concatenate( (batch_num, cur_extra_bbreg_rois), axis=1) if iidx == 0: extra_cropped_image = cur_extra_cropped_image extra_pos_rois = np.copy(cur_extra_pos_rois) extra_neg_rois = np.copy(cur_extra_neg_rois) # bbreg rois extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) else: extra_cropped_image = torch.cat( (extra_cropped_image, cur_extra_cropped_image), dim=0) extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) # bbreg rois extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0) extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0) extra_pos_rois = Variable( torch.from_numpy(extra_pos_rois.astype("float32"))).cuda() extra_neg_rois = Variable( torch.from_numpy(extra_neg_rois.astype("float32"))).cuda() # bbreg rois extra_bbreg_rois = Variable( torch.from_numpy(extra_bbreg_rois.astype("float32"))).cuda() extra_cropped_image -= 128.0 extra_feat_maps = self.model(extra_cropped_image, out_layer="conv3") # Draw pos/neg samples ishape = cur_image.shape extra_pos_feats = self.model.roi_align_model(extra_feat_maps, extra_pos_rois) extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() extra_neg_feats = self.model.roi_align_model(extra_feat_maps, extra_neg_rois) extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() # bbreg feat extra_bbreg_feats = self.model.roi_align_model(extra_feat_maps, extra_bbreg_rois) extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() # concatenate extra features to original_features pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) # concatenate extra bbreg feats to original_bbreg_feats bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) torch.cuda.empty_cache() self.model.zero_grad() self.P4 = torch.autograd.Variable(torch.eye(512 * 3 * 3 + 1).type( self.dtype), volatile=True) self.P5 = (torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype), volatile=True) * 10) self.P6 = torch.autograd.Variable(torch.eye(512 + 1).type(self.dtype), volatile=True) self.W4 = torch.autograd.Variable(torch.zeros(512 * 3 * 3 + 1, 512).type(self.dtype), volatile=True) self.W5 = torch.autograd.Variable(torch.zeros(512 + 1, 512).type(self.dtype), volatile=True) self.W6 = torch.autograd.Variable(torch.zeros(512 + 1, 2).type(self.dtype), volatile=True) self.flag_old = 0 # Initial training self.flag_old = train_owm( self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts["maxiter_init"], self.P4, self.P5, self.P6, self.W4, self.W5, self.W6, self.flag_old, ) # bbreg train if bbreg_feats.size(0) > opts["n_bbreg"]: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts["n_bbreg"]], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts["n_bbreg"]], :] self.bbreg = BBRegressor((ishape[1], ishape[0])) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) if pos_feats.size(0) > opts["n_pos_update"]: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) self.pos_feats_all = [ pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda()) ] if neg_feats.size(0) > opts["n_neg_update"]: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) self.neg_feats_all = [ neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda()) ] self.trans_f = opts["trans_f"]
def track(self, image_file): self.frame_idx += 1 # Load image cur_image = Image.open(image_file).convert("RGB") cur_image = np.asarray(cur_image) # Estimate target bbox ishape = cur_image.shape samples = gen_samples( SampleGenerator( "gaussian", (ishape[1], ishape[0]), self.trans_f, opts["scale_f"], valid=True, ), self.target_bbox, opts["n_samples"], ) padded_x1 = (samples[:, 0] - samples[:, 2] * (opts["padding"] - 1.0) / 2.0).min() padded_y1 = (samples[:, 1] - samples[:, 3] * (opts["padding"] - 1.0) / 2.0).min() padded_x2 = (samples[:, 0] + samples[:, 2] * (opts["padding"] + 1.0) / 2.0).max() padded_y2 = (samples[:, 1] + samples[:, 3] * (opts["padding"] + 1.0) / 2.0).max() padded_scene_box = np.asarray( (padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) if padded_scene_box[0] > cur_image.shape[1]: padded_scene_box[0] = cur_image.shape[1] - 1 if padded_scene_box[1] > cur_image.shape[0]: padded_scene_box[1] = cur_image.shape[0] - 1 if padded_scene_box[0] + padded_scene_box[2] < 0: padded_scene_box[2] = -padded_scene_box[0] + 1 if padded_scene_box[1] + padded_scene_box[3] < 0: padded_scene_box[3] = -padded_scene_box[1] + 1 crop_img_size = (padded_scene_box[2:4] * ((opts["img_size"], opts["img_size"]) / self.target_bbox[2:4])).astype("int64") cropped_image, cur_image_var = self.img_crop_model.crop_image( cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size) cropped_image = cropped_image - 128.0 self.model.eval() feat_map = self.model(cropped_image, out_layer="conv3") # relative target bbox with padded_scene_box rel_target_bbox = np.copy(self.target_bbox) rel_target_bbox[0:2] -= padded_scene_box[0:2] # Extract sample features and get target location batch_num = np.zeros((samples.shape[0], 1)) sample_rois = np.copy(samples) sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) sample_rois = samples2maskroi( sample_rois, self.model.receptive_field, (opts["img_size"], opts["img_size"]), self.target_bbox[2:4], opts["padding"], ) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy( sample_rois.astype("float32"))).cuda() sample_feats = self.model.roi_align_model(feat_map, sample_rois) sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() sample_scores = self.model(sample_feats, in_layer="fc4") top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.data.cpu().numpy() target_score = top_scores.data.mean() self.target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts["success_thr"] # # Expand search area at failure if success: self.trans_f = opts["trans_f"] else: self.trans_f = opts["trans_f_expand"] # Bbox regression if success: bbreg_feats = sample_feats[top_idx, :] bbreg_samples = samples[top_idx] bbreg_samples = self.bbreg.predict(bbreg_feats.data, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = self.target_bbox # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator("gaussian", (ishape[1], ishape[0]), 0.1, 1.2), self.target_bbox, opts["n_pos_update"], opts["overlap_pos_update"], ) neg_examples = gen_samples( SampleGenerator("uniform", (ishape[1], ishape[0]), 1.5, 1.2), self.target_bbox, opts["n_neg_update"], opts["overlap_neg_update"], ) padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts["padding"] - 1.0) / 2.0).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts["padding"] - 1.0) / 2.0).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts["padding"] + 1.0) / 2.0).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts["padding"] + 1.0) / 2.0).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4), ) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) jitter_scale = [1.0] for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ((opts["img_size"], opts["img_size"]) / self.target_bbox[2:4]) ).astype("int64") * jitter_scale[bidx] cropped_image, cur_image_var = self.img_crop_model.crop_image( cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128.0 feat_map = self.model(cropped_image, out_layer="conv3") rel_target_bbox = np.copy(self.target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0, ) scaled_obj_size = float(opts["img_size"]) * jitter_scale[bidx] cur_pos_rois = samples2maskroi( cur_pos_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype("float32"))).cuda() cur_pos_feats = self.model.roi_align_model( feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat( np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0, ) cur_neg_rois = samples2maskroi( cur_neg_rois, self.model.receptive_field, (scaled_obj_size, scaled_obj_size), self.target_bbox[2:4], opts["padding"], ) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype("float32"))).cuda() cur_neg_feats = self.model.roi_align_model( feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() self.feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats # index select neg_feats = cur_neg_feats else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) if pos_feats.size(0) > opts["n_pos_update"]: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts["n_pos_update"]]).cuda()) if neg_feats.size(0) > opts["n_neg_update"]: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts["n_neg_update"]]).cuda()) self.pos_feats_all.append(pos_feats) self.neg_feats_all.append(neg_feats) if len(self.pos_feats_all) > opts["n_frames_long"]: del self.pos_feats_all[0] if len(self.neg_feats_all) > opts["n_frames_short"]: del self.neg_feats_all[0] # Short term update if not success: nframes = min(opts["n_frames_short"], len(self.pos_feats_all)) pos_data = torch.stack(self.pos_feats_all[-nframes:], 0).view(-1, self.feat_dim) neg_data = torch.stack(self.neg_feats_all, 0).view(-1, self.feat_dim) self.flag_old = train( self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts["maxiter_update"], self.W4, self.W5, self.W6, self.flag_old, ) # Long term update elif self.frame_idx % opts["long_interval"] == 0: nframes = min(opts["n_frames_short"], len(self.pos_feats_all)) pos_data = torch.stack(self.pos_feats_all[-nframes:], 0).view(-1, self.feat_dim) neg_data = torch.stack(self.neg_feats_all, 0).view(-1, self.feat_dim) self.flag_old = train_owm( self.model, self.criterion, self.update_optimizer_owm, pos_data, neg_data, opts["maxiter_update"], self.P4, self.P5, self.P6, self.W4, self.W5, self.W6, self.flag_old, ) return bbreg_bbox
def search_track(self, track_num, frame_idx, init_bbox, previous_num, init_conf): # img_list, init_bbox # Init bbox # result = np.zeros((len(img_list), 4)) # result_bb = np.zeros((len(img_list), 4)) # result[0] = target_bbox # result_bb[0] = target_bbox # superorange params track_list = [(-1, -1)] * (self.TRACKLET_NUM + 1) bbox_list = [(frame_idx, init_bbox, init_conf)] IOU_count = 0 # last_bbox = -1 # next_frame = -1 # frameA_path = os.path.join(self.FILE_PATH, str(frame_idx) + "."+ext) frameA_path = os.path.join(self.FILE_PATH, str(frame_idx).zfill(5) + ".png") target_bbox = np.array(init_bbox) # Init model model = MDNet(opts['model_path']) if opts['use_gpu']: model = model.cuda() # Init criterion and optimizer criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(frameA_path).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])( target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features if len(pos_examples) == 0 or len(neg_examples) == 0: print("!!pos_examples=0 skip!!") return pos_feats = self.forward_samples(model, image, pos_examples) neg_feats = self.forward_samples(model, image, neg_examples) # Initial training self.train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = self.forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = self.forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # fps = len(img_list) / spf_total # return result, result_bb, fps # Main loop for i in range(0, self.TRACKLET_NUM): # next 10 frame frameB_idx = frame_idx + i + 1 # print("frameB_idx="+str(frameB_idx)) if frameB_idx > self.frame_num: break else: frameB_path = os.path.join(self.FILE_PATH, str(frameB_idx).zfill(5) + ".png") # ------------track by MDNet------------ # Load image image = Image.open(frameB_path).convert('RGB') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = self.forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = self.forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result # result[i] = target_bbox # result_bb[i] = bbreg_bbox # print(target_bbox) # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = self.forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = self.forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) self.train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) self.train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() bboxT = bbreg_bbox anyIOU = False for bbox_id, bboxD in enumerate(frameBBoxList[frameB_idx]): if bboxD.match == False: IOU_ratio = overlap_ratio(bboxD.getRec(), bboxT) print("IOUratio={}".format(IOU_ratio)) # print(IOU_ratio) if IOU_ratio > 0.3: # print("overlap") track_list[i] = (frameB_idx, bbox_id) IOU_count = IOU_count + 1 bbox_list.append( (frameB_idx, bboxD.getRec(), bboxD.confidence)) anyIOU = True break if not anyIOU: bbox_list.append((frameB_idx, bboxT, 0)) print("bbox_list=====") print(bbox_list) print("track_list=====") print(track_list) ##debug show track_list # for idx in range(0,10): # print(track_list[idx]) # print(bbox_list[idx]) if IOU_count >= self.IOU_count_th: # add track for idx in range(0, self.TRACKLET_NUM + 1): bbox_id = track_list[idx][1] if bbox_id != -1: # print(frame_idx+idx+1) # print(bbox_id) frameBBoxList[frame_idx + idx + 1][bbox_id].setMatch() next_track_frame = -1 init_bbox_next = [] bbox_length = len(bbox_list) #!!! track_list length=10, bbox_list length=11 for ii in range(1, bbox_length): idx = bbox_length - ii print("{} {} {}".format(ii, idx, track_list[idx][0])) if track_list[idx - 1][0] != -1: next_track_frame = (-1) * ii # count from back print("idx ={} ii={} next_track_frame={}".format( idx, ii, next_track_frame)) init_bbox_next = bbox_list[idx] break # remove overlap range start_rm = -1 while start_rm >= previous_num: del self.GLOBAL_TRACK_LIST[track_num][-1] # rm 1 per move # GLOBAL_TRACK_LIST[track_num].remove(-1) start_rm = start_rm - 1 if previous_num == 0: self.GLOBAL_TRACK_LIST.append(bbox_list) else: self.GLOBAL_TRACK_LIST[track_num].extend(bbox_list) next_start_frame = frame_idx + bbox_length + next_track_frame init_bbox_next = bbox_list[bbox_length + next_track_frame][1] init_conf_next = bbox_list[bbox_length + next_track_frame][2] # print(GLOBAL_TRACK_LIST) print("( " + str(track_num) + " " + str(next_start_frame) + " " + str(next_track_frame) + ")") print("init_bbox_next:" + str(init_bbox_next)) self.search_track( track_num, next_start_frame, init_bbox_next, next_track_frame, init_conf_next ) # next_start_frame = 310 next_track_frame=-3(will be deleted)
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False, loss_index=1, model_path=opts['model_path'], seq_name=None): #def run_mdnet(k, img_list, init_bbox, gt=None, savefig_dir='', display=False, # loss_index=1, model_path=opts['model_path'], seq_name=None): ############################ if fewer_images: num_images = min(sequence_len_limit, len(img_list)) else: num_images = len(img_list) ############################ # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox # Init iou and pred_iou iou_list = np.zeros((len(img_list), 1)) # shape: [113.1) ### list of ious iou_list[0] = 1.0 ### in first frame gt=result_bb (by definition) if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model # model = MDNet(model_path=opts['model_path'],use_gpu=opts['use_gpu']) model = MDNet(model_path=model_path, use_gpu=opts['use_gpu']) if opts['use_gpu']: model = model.cuda() print('Init criterion and optimizer') criterion = BCELoss() model.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') print('Draw pos/neg samples') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) print('Extract pos/neg features') # Extract pos/neg features if fewer_images: # shorter run in general, less accurate pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) else: pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) print('Initial training') # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], loss_index=loss_index) ### iou_pred_list del init_optimizer, neg_feats torch.cuda.empty_cache() print('Train bbox regressor') # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples( model, image, bbreg_examples) # calc features ### shape: [927, 4608] ### bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) print('Init pos/neg features for update') # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) ################# num_gts = np.minimum(gt.shape[0], num_images) # print('num_gts.shape: ', num_gts.shape) gt_centers = gt[:num_gts, :2] + gt[:num_gts, 2:] / 2 result_centers = np.zeros_like(gt[:num_gts, :2]) result_centers[0] = gt_centers[0] result_ious = np.zeros(num_gts, dtype='float64') result_ious[0] = 1. ################# rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) print('Main Loop') # Main loop spf_total = 0 # I don't want to take into account initialization for i in tqdm(range(1, num_images)): # for i in range(1, len(img_list)): #print('Frame: ', i) tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') #print('Estimate target bbox (in run_mdnet)') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) #print('Bbox regression (in run_mdnet)') # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox iou_list[i] = overlap_ratio(gt[i], result_bb[i]) ########################################### # identify tracking failure and abort when in VOT mode IoU = overlap_ratio(result_bb[i], gt[i])[0] if (IoU == 0) and init_after_loss: print(' * lost track in frame %d since init*' % (i)) result_distances = scipy.spatial.distance.cdist( result_centers[:i], gt_centers[:i], metric='euclidean').diagonal() num_images_tracked = i - 1 # we don't count frame 0 and current frame (lost track) im.set_data(image) if gt is not None: if i < gt.shape[0]: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) else: gt_rect.set_xy(np.array([np.nan, np.nan])) gt_rect.set_width(np.nan) gt_rect.set_height(np.nan) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) plt.pause(.01) plt.draw() print( 'Finished identify tracking failure and abort when in VOT mode' ) return result[: i], result_bb[: i], num_images_tracked, spf_total, result_distances, result_ious[: i], True ######################################## # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf #print('Time: ', spf) # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) ################# result_ious[i] = overlap_ratio(result_bb[i], gt[i])[0] result_centers[i] = result_bb[i, :2] + result_bb[i, 2:] / 2 ################# rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) #################################### if detailed_printing: if gt is None: print(" Frame %d/%d, Score %.3f, Time %.3f" % \ (i, num_images-1, target_score, spf)) else: if i < gt.shape[0]: print(" Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, num_images-1, overlap_ratio(gt[i], result_bb[i])[0], target_score, spf)) else: print(" Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, num_images-1, overlap_ratio(np.array([np.nan,np.nan,np.nan,np.nan]), result_bb[i])[0], target_score, spf)) #################################### # if gt is None: # print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}' # .format(i, len(img_list), target_score, spf)) # else: # overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] # print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}' # .format(i, len(img_list), overlap[i], target_score, spf)) ######################## plt.close() result_distances = scipy.spatial.distance.cdist( result_centers, gt_centers, metric='euclidean').diagonal() num_images_tracked = num_images - 1 # I don't want to count initialization frame (i.e. frame 0) print(' main loop finished, %d frames' % (num_images)) print('mean IoU: ', iou_list.mean()) print('Finished run_mdnet()') return result, result_bb, num_images_tracked, spf_total, result_distances, result_ious, False
def run_vtaan(img_list, init_bbox, gt=None, savefig_dir='', display=False): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox result_bb[0] = target_bbox if gt is not None: overlap = np.zeros(len(img_list)) overlap[0] = 1 # Init model model = MDNet(opts['model_path']) model_g = NetG() if opts['use_gpu']: model = model.cuda() model_g = model_g.cuda() GBP = guided_backprop.GuidedBackprop(model, 1) # Init criterion and optimizer criterion = BCELoss() criterion_g = torch.nn.MSELoss(reduction='sum') model.set_learnable_params(opts['ft_layers']) model_g.set_learnable_params(opts['ft_layers']) init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult']) update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult']) tic = time.time() # Load first image image = Image.open(img_list[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']), SampleGenerator('whole', image.size)(target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) pos_imgids = np.array([[0]] * pos_feats.size(0)) neg_imgids = np.array([[0]] * neg_feats.size(0)) feat_dim = pos_feats.size(-1) # Initial training train(model, None, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], pos_imgids, pos_examples, neg_imgids, neg_examples, img_list, GBP) del init_optimizer, neg_feats torch.cuda.empty_cache() g_pretrain(model, model_g, criterion_g, pos_feats) torch.cuda.empty_cache() # Train bbox regressor bbreg_examples = SampleGenerator( 'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'], opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'], opts['overlap_bbreg']) bbreg_feats = forward_samples(model, image, bbreg_examples) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale']) pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos']) neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg']) # Init pos/neg features for update neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init']) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all = [pos_feats[:opts['n_pos_update']]] neg_feats_all = [neg_feats[:opts['n_neg_update']]] pos_examples_all = [pos_examples[:opts['n_pos_update']]] neg_examples_all = [neg_examples[:opts['n_neg_update']]] pos_imgids_all = [pos_imgids[:opts['n_pos_update']]] neg_imgids_all = [neg_imgids[:opts['n_neg_update']]] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (image.size[0] / dpi, image.size[1] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image, aspect='auto') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) # Main loop for i in range(1, len(img_list)): tic = time.time() # Load image image = Image.open(img_list[i]).convert('RGB') # Estimate target bbox samples = sample_generator(target_bbox, opts['n_samples']) sample_scores = forward_samples(model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() target_bbox = samples[top_idx] if top_idx.shape[0] > 1: target_bbox = target_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure if success: sample_generator.set_trans(opts['trans']) else: sample_generator.expand_trans(opts['trans_limit']) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bbox # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox # Data collect if success: pos_examples = pos_generator(target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) pos_feats = forward_samples(model, image, pos_examples) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] del pos_examples_all[0] del pos_imgids_all[0] neg_examples = neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) neg_feats = forward_samples(model, image, neg_examples) neg_feats_all.append(neg_feats) pos_examples_all.append(pos_examples) neg_examples_all.append(neg_examples) pos_imgids_all.append(np.array([[i]] * pos_feats.size(0))) neg_imgids_all.append(np.array([[i]] * neg_feats.size(0))) if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] del neg_examples_all[0] del neg_imgids_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.stack(pos_feats_all[-nframes:], 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) pos_examples_data = torch.from_numpy( np.stack(pos_examples_all[-nframes:], 0)).view(-1, 4).numpy() neg_examples_data = torch.from_numpy(np.stack(neg_examples_all, 0)).view(-1, 4).numpy() pos_imgids_data = torch.from_numpy( np.stack(pos_imgids_all[-nframes:], 0)).view(-1, 1).numpy() neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all, 0)).view(-1, 1).numpy() train(model, None, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP) # Long term update elif i % opts['long_interval'] == 0: pos_data = t.stack(pos_feats_all, 0).view(-1, feat_dim) neg_data = t.stack(neg_feats_all, 0).view(-1, feat_dim) pos_examples_data = torch.from_numpy(np.stack(pos_examples_all, 0)).view(-1, 4).numpy() neg_examples_data = torch.from_numpy(np.stack(neg_examples_all, 0)).view(-1, 4).numpy() pos_imgids_data = torch.from_numpy(np.stack(pos_imgids_all, 0)).view(-1, 1).numpy() neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all, 0)).view(-1, 1).numpy() # train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], # pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP) train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], None, None, None, None, img_list, GBP) torch.cuda.empty_cache() spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)), dpi=dpi) if gt is None: print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format( i + 1, len(img_list), target_score, spf)) else: overlap[i] = overlap_ratio(gt[i], result_bb[i])[0] print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'. format(i + 1, len(img_list), overlap[i], target_score, spf)) if gt is not None: print('meanIOU: {:.3f}'.format(overlap.mean())) fps = len(img_list) / spf_total return result, result_bb, fps
def init(self, image, init_bbox): self.rate = init_bbox[2] / init_bbox[3] self.target_bbox = np.array(init_bbox) self.init_bbox = np.array(init_bbox) self.result.append(self.target_bbox) self.result_bb.append(self.target_bbox) image = np.asarray(image) # Init model bbreg_examples = gen_samples( SampleGenerator('uniform', image.shape, 0.3, 1.5, 1.1), self.target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) bbreg_feats = forward_samples(self.model, image, bbreg_examples) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) pos_examples = gen_samples( SampleGenerator('gaussian', image.shape, 0.1, 1.2), self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ gen_samples(SampleGenerator('uniform', image.shape, 1, 2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']), gen_samples(SampleGenerator('whole', image.shape, 0, 1.2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) pos_feats = forward_samples(self.model, image, pos_examples) neg_feats = forward_samples(self.model, image, neg_examples) train(self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) self.deta_flag = init_actor(self.actor, image, self.target_bbox) self.init_generator = SampleGenerator('gaussian', image.shape, opts['trans_f'], 1, valid=False) self.sample_generator = SampleGenerator('gaussian', image.shape, opts['trans_f'], opts['scale_f'], valid=False) self.pos_generator = SampleGenerator('gaussian', image.shape, 0.1, 1.2) self.neg_generator = SampleGenerator('uniform', image.shape, 1.5, 1.2) self.pos_feats_all = [pos_feats[:opts['n_pos_update']]] self.neg_feats_all = [neg_feats[:opts['n_neg_update']]] pos_score = forward_samples(self.model, image, np.array(init_bbox).reshape([1, 4]), out_layer='fc6') self.img_learn = [image] self.pos_learn = [init_bbox] self.score_pos = [pos_score.cpu().numpy()[0][1]] self.frame_learn = [0] self.pf_frame = [] self.imageVar_first = cv2.Laplacian( crop_image_blur(np.array(image), self.target_bbox), cv2.CV_64F).var()
class ACTTracker(Tracker): def __init__(self, net_path=None): super().init(name='ACTTracker', is_deterministic=True) np.random.seed(123) torch.manual_seed(456) torch.cuda.manual_seed(789) self.model = MDNet() self.actor = Actor() self.result = [] self.result_bb = [] self.success = 1 if opts['use_gpu']: self.model = self.model.cuda() self.actor = self.actor.cuda() self.model.set_learnable_params(opts['ft_layers']) self.criterion = BinaryLoss() self.init_optimizer = set_optimizer(self.model, opts['lr_init']) self.update_optimizer = set_optimizer(self.model, opts['lr_update']) self.detetion = 0 self.frame = 0 def init(self, image, init_bbox): self.rate = init_bbox[2] / init_bbox[3] self.target_bbox = np.array(init_bbox) self.init_bbox = np.array(init_bbox) self.result.append(self.target_bbox) self.result_bb.append(self.target_bbox) image = np.asarray(image) # Init model bbreg_examples = gen_samples( SampleGenerator('uniform', image.shape, 0.3, 1.5, 1.1), self.target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) bbreg_feats = forward_samples(self.model, image, bbreg_examples) self.bbreg = BBRegressor(image.size) self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox) pos_examples = gen_samples( SampleGenerator('gaussian', image.shape, 0.1, 1.2), self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = np.concatenate([ gen_samples(SampleGenerator('uniform', image.shape, 1, 2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']), gen_samples(SampleGenerator('whole', image.shape, 0, 1.2, 1.1), self.target_bbox, opts['n_neg_init'] // 2, opts['overlap_neg_init']) ]) neg_examples = np.random.permutation(neg_examples) pos_feats = forward_samples(self.model, image, pos_examples) neg_feats = forward_samples(self.model, image, neg_examples) train(self.model, self.criterion, self.init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) self.deta_flag = init_actor(self.actor, image, self.target_bbox) self.init_generator = SampleGenerator('gaussian', image.shape, opts['trans_f'], 1, valid=False) self.sample_generator = SampleGenerator('gaussian', image.shape, opts['trans_f'], opts['scale_f'], valid=False) self.pos_generator = SampleGenerator('gaussian', image.shape, 0.1, 1.2) self.neg_generator = SampleGenerator('uniform', image.shape, 1.5, 1.2) self.pos_feats_all = [pos_feats[:opts['n_pos_update']]] self.neg_feats_all = [neg_feats[:opts['n_neg_update']]] pos_score = forward_samples(self.model, image, np.array(init_bbox).reshape([1, 4]), out_layer='fc6') self.img_learn = [image] self.pos_learn = [init_bbox] self.score_pos = [pos_score.cpu().numpy()[0][1]] self.frame_learn = [0] self.pf_frame = [] self.imageVar_first = cv2.Laplacian( crop_image_blur(np.array(image), self.target_bbox), cv2.CV_64F).var() def update(self, image): # image = loader(image.resize((225,225),Image.ANTIALIAS)).unsqueeze(0).cuda() self.frame += 1 update_lenth = 10 np_image = np.array(image) if self.imageVar_first > 200: imageVar = cv2.Laplacian( crop_image_blur(np_image, self.target_bbox), cv2.CV_64F).var() else: imageVar = 200 img_l = getbatch_actor(np_image, self.target_bbox.reshape([1, 4])) torch_image = loader(image.resize( (225, 225), Image.ANTIALIAS)).unsqueeze(0).cuda() deta_pos = self.actor(img_l, torch_image) deta_pos = deta_pos.data.clone().cpu().numpy() if self.deta_flag: deta_pos[:, 2] = 0 if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if len(self.pf_frame) and self.frame == (self.pf_frame[-1] + 1): deta_pos[:, 2] = 0 pos_ = np.round( move_crop(self.target_bbox, deta_pos, (image.size[1], image.size[0]), self.rate)) r = forward_samples(self.model, image, np.array(pos_).reshape([1, 4]), out_layer='fc6') r = r.cpu().numpy() if r[0][1] > 0 and imageVar > 100: self.target_bbox = pos_ target_score = r[0][1] bbreg_bbox = pos_ success = 1 if True: fin_score = r[0][1] self.img_learn.append(image) self.pos_learn.append(self.target_bbox) self.score_pos.append(fin_score) self.frame_learn.append(self.frame) while len(self.img_learn) > update_lenth * 2: del self.img_learn[0] del self.pos_learn[0] del self.score_pos[0] del self.frame_learn[0] self.result[self.frame] = self.target_bbox self.result_bb[self.frame] = bbreg_bbox else: self.detetion += 1 if len(self.pf_frame) == 0: self.pf_frame = [self.frame] else: self.pf_frame.append(self.frame) if (len(self.frame_learn) == update_lenth * 2 and self.data_frame[-1] not in self.frame_learn) or self.data_frame[-1] == 0: for num in range(max(0, self.img_learn.__len__() - update_lenth), self.img_learn.__len__()): if self.frame_learn[num] not in self.data_frame: gt_ = self.pos_learn[num] image_ = self.img_learn[num] pos_examples = np.round( gen_samples(self.pos_generator, gt_, opts['n_pos_update'], opts['overlap_pos_update'])) neg_examples = np.round( gen_samples(self.neg_generator, gt_, opts['n_neg_update'], opts['overlap_neg_update'])) pos_feats_ = forward_samples(self.model, image_, pos_examples) neg_feats_ = forward_samples(self.model, image_, neg_examples) self.pos_feats_all.append(pos_feats_) self.neg_feats_all.append(neg_feats_) self.data_frame.append(self.frame_learn[num]) if len(self.pos_feats_all) > 10: del self.pos_feats_all[0] del self.neg_feats_all[0] del self.data_frame[0] else: pos_feats_ = self.pos_feats_all[self.data_frame.index( self.frame_learn[num])] neg_feats_ = self.neg_feats_all[self.data_frame.index( self.frame_learn[num])] if num == max(0, self.img_learn.__len__() - update_lenth): pos_feats = pos_feats_ neg_feats = neg_feats_ else: pos_feats = torch.cat([pos_feats, pos_feats_], 0) neg_feats = torch.cat([neg_feats, neg_feats_], 0) train(self.model, self.criterion, self.update_optimizer, pos_feats, neg_feats, opts['maxiter_update']) if success: self.sample_generator.set_trans_f(opts['trans_f']) else: self.sample_generator.set_trans_f(opts['trans_f_expand']) if imageVar < 100: samples = gen_samples(self.init_generator, self.target_bbox, opts['n_samples']) else: samples = gen_samples(self.sample_generator, self.target_bbox, opts['n_samples']) if i < 20 or ((self.init_bbox[2] * self.init_bbox[3]) > 1000 and (self.target_bbox[2] * self.target_bbox[3] / (self.init_bbox[2] * self.init_bbox[3]) > 2.5 or self.target_bbox[2] * self.target_bbox[3] / (self.init_bbox[2] * self.init_bbox[3]) < 0.4)): self.sample_generator.set_trans_f(opts['trans_f_expand']) samples_ = np.round( gen_samples( self.sample_generator, np.hstack([ self.target_bbox[0:2] + self.target_bbox[2:4] / 2 - self.init_bbox[2:4] / 2, self.init_bbox[2:4] ]), opts['n_samples'])) samples = np.vstack([samples, samples_]) sample_scores = forward_samples(self.model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() self.target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # Bbox regression if success: bbreg_samples = samples[top_idx] bbreg_feats = forward_samples(self.model, image, bbreg_samples) bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) self.img_learn.append(image) self.pos_learn.append(self.target_bbox) self.score_pos.append(self.target_score) self.frame_learn.append(i) while len(self.img_learn) > 2 * update_lenth: del self.img_learn[0] del self.pos_learn[0] del self.score_pos[0] del self.frame_learn[0] else: bbreg_bbox = self.target_bbox # Copy previous result at failure if not success: target_bbox = self.result[self.frame - 1] bbreg_bbox = self.result_bb[self.frame - 1] # Save result self.result[self.frame] = target_bbox self.result_bb[self.frame] = bbreg_bbox return self.target_bbox