def gen_samples(generator, bbox, n, img_size, overlap_range=None, scale_range=None): if overlap_range is None and scale_range is None: return generator(bbox, n, img_size) else: samples = None remain = n factor = 2 while remain > 0 and factor < 32: #change to 32, before is 16 samples_ = generator(bbox, remain * factor, img_size) #print(samples_) idx = np.ones(len(samples_), dtype=bool) if overlap_range is not None: r = overlap_ratio(samples_, bbox) idx *= (r >= overlap_range[0]) * (r <= overlap_range[1]) if scale_range is not None: s = np.prod(samples_[:, 2:], axis=1) / np.prod(bbox[2:]) idx *= (s >= scale_range[0]) * (s <= scale_range[1]) samples_ = samples_[idx, :] samples_ = samples_[:min(remain, len(samples_))] if samples is None: samples = samples_ else: samples = np.concatenate([samples, samples_]) remain = n - len(samples) factor = factor * 2 return samples
def train(self, X, bbox, gt): X = X.cpu().numpy() bbox = np.copy(bbox) gt = np.copy(gt) if gt.ndim == 1: gt = gt[None, :] r = overlap_ratio(bbox, gt) s = np.prod(bbox[:, 2:], axis=1) / np.prod(gt[0, 2:]) idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ (s >= self.scale_range[0]) * (s <= self.scale_range[1]) X = X[idx] bbox = bbox[idx] Y = self.get_examples(bbox, gt) self.model.fit(X, Y)
def predict(self, X, bbox): X = X.cpu().numpy() bbox_ = np.copy(bbox) Y = self.model.predict(X) bbox_[:, :2] = bbox_[:, :2] + bbox_[:, 2:] / 2 bbox_[:, :2] = Y[:, :2] * bbox_[:, 2:] + bbox_[:, :2] bbox_[:, 2:] = np.exp(Y[:, 2:]) * bbox_[:, 2:] bbox_[:, :2] = bbox_[:, :2] - bbox_[:, 2:] / 2 r = overlap_ratio(bbox, bbox_) s = np.prod(bbox[:, 2:], axis=1) / np.prod(bbox_[:, 2:], axis=1) idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ (s >= self.scale_range[0]) * (s <= self.scale_range[1]) idx = np.logical_not(idx) bbox_[idx] = bbox[idx] bbox_[:, :2] = np.maximum(bbox_[:, :2], 0) bbox_[:, 2:] = np.minimum(bbox_[:, 2:], self.img_size - bbox[:, :2]) return bbox_
pre.append(1) if frame == (num_frames - 1): #last frame gt.append(0) else: gt.append(1) if reset: gt_Polygon = ground_truth[frame] if gt_Polygon[2] * gt_Polygon[3] != 0: tracker0.init(im, tuple(gt_Polygon)) template_gt = np.concatenate( (template_gt, tracker0.model.zf.cpu().data.numpy())) else: template_gt = np.concatenate( (template_gt, np.zeros([1, 256, 7, 7]))) iou = overlap_ratio(np.array(gt_Polygon), np.array(outputs['bbox'])) if iou <= 0: break else: template_gt = np.concatenate( (template_gt, np.zeros([1, 256, 7, 7]))) template_acc = np.concatenate( (template_acc, np.zeros([1, 256, 7, 7]))) template_cur = np.concatenate( (template_cur, np.zeros([1, 256, 7, 7]))) init0.append(0) init.append(frame) pre.append(1) if frame == (num_frames - 1): #last frame gt.append(0) else:
if template_acc is None: template_acc = tracker.model.zf.cpu().data.numpy() else: template_acc = np.concatenate( (template_acc, tracker.model.zf.cpu().data.numpy())) if template_cur is None: template_cur = tracker.model.zf.cpu().data.numpy() else: template_cur = np.concatenate( (template_cur, tracker.model.zf.cpu().data.numpy())) init.append(num) init0.append(num_reset) pre.append(0) gt.append(1) # ---------------- elif overlap_ratio(np.array(init_rect), np.array(img_rect)) > 0.05: num_reset += 1 # execute tracker outputs = tracker.track(img) # ---------------- if template_acc is None: template_acc = tracker.model.zf.cpu().data.numpy() else: template_acc = np.concatenate( (template_acc, tracker.model.zf.cpu().data.numpy())) if template_cur is None: template_cur = tracker.model.zf.cpu().data.numpy() else: template_cur = np.concatenate( (template_cur, outputs['zf_cur'].cpu().data.numpy())) init.append(num)
gt_init = gt[0] x, y, w, h = gt_init[0], gt_init[1], gt_init[2], gt_init[3] cx, cy = x + w // 2, y + h // 2 # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) image_file = os.path.join(img_dir, img_list[0]) im = cv2.imread(image_file) # HxWxC tic = time.time() state = SiamRPN_init(im, target_pos, target_sz, net, gt[0]) # init tracker toc += time.time() - tic # tracking for i in range(len(img_list)): image_file = os.path.join(img_dir, img_list[i]) if not image_file: break im = cv2.imread(image_file) # HxWxC tic = time.time() state = SiamRPN_track(state, im) # track toc += time.time() - tic res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) rect_list.append(res.tolist()) overlap.append(overlap_ratio(res, gt[i])) # store results print("fps: {}".format(len(gt) / toc)) res_dict[seqname] = rect_list res_dict[seqname + "time"] = [toc, len(gt), len(gt) / toc] json.dump(res_dict, open("../results/DaSiamRPNAT.json", 'w'), indent=2)
def main(images, init_bbox, ground_truths, opts): device = ('cuda' if opts.use_gpu else 'cpu') model = MDNet(opts.model_path).to(device) criterion = BCELoss() # Set learnable parameters for k, p in model.params.items(): p.requires_grad = any([k.startswith(l) for l in opts.ft_layers]) # Set optimizer states def set_optimizer(lr_base, lr_mult, momentum=0.9, w_decay=0.0005): param_list = [] for k, p in filter(lambda kp: kp[1].requires_grad, model.params.items()): lr = lr_base for l, m in lr_mult.items(): if k.startswith(l): lr = lr_base * m param_list.append({'params': [p], 'lr': lr}) return optim.SGD(param_list, lr=lr, momentum=momentum, weight_decay=w_decay) init_optimizer = set_optimizer(opts.lr_init, opts.lr_mult) update_optimizer = set_optimizer(opts.lr_update, opts.lr_mult) # Load first image image = Image.open(images[0]).convert('RGB') # Draw pos/neg samples pos_examples = SampleGenerator('gaussian', image.size, opts.trans_pos, opts.scale_pos)( init_bbox, opts.n_pos_init, opts.overlap_pos_init) neg_examples = np.concatenate([ SampleGenerator('uniform', image.size, opts.trans_neg_init, opts.scale_neg_init)( init_bbox, int(opts.n_neg_init * 0.5), opts.overlap_neg_init), SampleGenerator('whole', image.size)( init_bbox, int(opts.n_neg_init * 0.5), opts.overlap_neg_init)]) neg_examples = np.random.permutation(neg_examples) # Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples, opts) neg_feats = forward_samples(model, image, neg_examples, opts) # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts.maxiter_init, opts) del init_optimizer, neg_feats torch.cuda.empty_cache() # Train bbox Regressor bbreg_examples = SampleGenerator('uniform', image.size, opts.trans_bbreg, opts.scale_bbreg, opts.aspect_bbreg)\ (init_bbox, opts.n_bbreg, opts.overlap_bbreg) bbreg_feats = forward_samples(model, image, bbreg_examples, opts) bbreg = BBRegressor(image.size) bbreg.train(bbreg_feats, bbreg_examples, init_bbox) del bbreg_feats torch.cuda.empty_cache() # Init sample generators for update sample_generator = SampleGenerator('gaussian', image.size, opts.trans, opts.scale) pos_generator = SampleGenerator('gaussian', image.size, opts.trans_pos, opts.scale_pos) neg_generator = SampleGenerator('uniform', image.size, opts.trans_neg, opts.scale_neg) # Init pos/neg features for update neg_examples = neg_generator(init_bbox, opts.n_neg_update, opts.overlap_neg_init) neg_feats = forward_samples(model, image, neg_examples, opts) pos_feats_all = [pos_feats] neg_feats_all = [neg_feats] # Main loop for i, image in enumerate(images[1:], 1): image = Image.open(image).convert('RGB') # Estimate target bbox samples = sample_generator(init_bbox, opts.n_samples) sample_scores = forward_samples(model, image, samples, opts, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu() target_score = top_scores.mean() init_bbox = samples[top_idx] if top_idx.shape[0] > 1: init_bbox = init_bbox.mean(axis=0) success = target_score > 0 # Expand search area at failure sample_generator.trans = opts.trans if success else min(sample_generator.trans * 1.1, opts.trans_limit) # Bbox regression if success: bbreg_samples = samples[top_idx] if top_idx.shape[0] == 1: bbreg_samples = bbreg_samples[None, :] bbreg_feats = forward_samples(model, image, bbreg_samples, opts) bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = init_bbox yield init_bbox, bbreg_bbox, overlap_ratio(ground_truths[i], bbreg_bbox)[0], target_score # Data collect if success: pos_examples = pos_generator(init_bbox, opts.n_pos_update, opts.overlap_pos_update) pos_feats = forward_samples(model, image, pos_examples, opts) pos_feats_all.append(pos_feats) if len(pos_feats_all) > opts.n_frames_long: del pos_feats_all[0] neg_examples = neg_generator(init_bbox, opts.n_neg_update, opts.overlap_neg_update) neg_feats = forward_samples(model, image, neg_examples, opts) neg_feats_all.append(neg_feats) if len(neg_feats_all) > opts.n_frames_short: del neg_feats_all[0] # Short term update # TODO: What if disable Short term upate? if not success: nframes = min(opts.n_frames_short, len(pos_feats_all)) pos_data = torch.cat(pos_feats_all[-nframes:], 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts.maxiter_update, opts) # Long term update # TODO: What if disable Long term update? elif i % opts.long_interval == 0: pos_data = torch.cat(pos_feats_all, 0) neg_data = torch.cat(neg_feats_all, 0) train(model, criterion, update_optimizer, pos_data, neg_data, opts.maxiter_update, opts) torch.cuda.empty_cache()