def init_classifier(self, init_backbone_feat_rgb, init_backbone_feat_d): # Get classification features x_rgb, x_d = self.get_classification_features(init_backbone_feat_rgb, init_backbone_feat_d) # Overwrite some parameters in the classifier. (These are not generally changed) self._overwrite_classifier_params(feature_dim=x_rgb.shape[-3]) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and self.params.get('use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1]*num) x_rgb = torch.cat([x_rgb, F.dropout2d(x_rgb[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)]) x_d = torch.cat([x_d, F.dropout2d(x_d[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)]) # Set feature size and other related sizes self.feature_sz = torch.Tensor(list(x_rgb.shape[-2:])) ksz = self.net_rgb.classifier.filter_size self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1)%2 # Construct output window self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped(self.output_sz.long(), (self.output_sz*self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = self.params.get('net_opt_iter', None) # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter_rgb, _, losses_rgb = self.net_rgb.classifier.get_filter(x_rgb, target_boxes, num_iter=num_iter, compute_losses=plot_loss) self.target_filter_d, _, losses_d = self.net_d.classifier.get_filter(x_d, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if self.params.get('update_classifier', True): self.init_memory(TensorList([x_rgb]), TensorList([x_d])) if plot_loss: if isinstance(losses_rgb, dict): losses_rgb = losses_rgb['train'] losses_d = losses_d['train'] self.losses_rgb = torch.cat(losses_rgb) self.losses_d = torch.cat(losses_d) if self.visdom is not None: self.visdom.register((self.losses_rgb, torch.arange(self.losses_rgb.numel())), 'lineplot', 3, 'Training Loss_RGB' + self.id_str) self.visdom.register((self.losses_d, torch.arange(self.losses_d.numel())), 'lineplot', 3, 'Training Loss_D' + self.id_str) elif self.params.debug >= 3: plot_graph(self.losses_rgb, 10, title='Training Loss_RGB' + self.id_str) plot_graph(self.losses_d, 10, title='Training Loss_D' + self.id_str)
def init_classifier(self, init_backbone_feat): # Get classification features x = self.get_classification_features(init_backbone_feat) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1]*num) x = torch.cat([x, F.dropout2d(x[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)]) # Set feature size and other related sizes #18,18 self.feature_sz = torch.Tensor(list(x.shape[-2:])) ksz = self.net.classifier.filter_size self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1)%2 #print(['output_sz', self.output_sz]) # Construct output window self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = getattr(self.params, 'net_opt_iter', None) # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter, _, losses = self.net.classifier.get_filter(x, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if getattr(self.params, 'update_classifier', True): self.init_memory(TensorList([x])) if plot_loss: if isinstance(losses, dict): losses = losses['train'] self.losses = torch.stack(losses) if self.visdom is not None: self.visdom.register((self.losses, torch.arange(self.losses.numel())), 'lineplot', 3, 'Training Loss') elif self.params.debug >= 3: plot_graph(self.losses, 10, title='Training loss')
def setting_adaptive_search_region_using_speed(self, im): """ reinitialze search region scale for next frame """ self.atom.target_scale = 1.0 search_area = torch.prod(self.atom.target_sz * self.atom.params.search_area_scale).item() if search_area > self.atom.params.max_image_sample_size: self.atom.target_scale = math.sqrt(search_area / self.atom.params.max_image_sample_size) elif search_area < self.atom.params.min_image_sample_size: self.atom.target_scale = math.sqrt(search_area / self.atom.params.min_image_sample_size) # Target size in base scale self.atom.base_target_sz = self.atom.target_sz / self.atom.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.atom.params.features.stride()) if getattr(self.atom.params, 'search_area_shape', 'square') == 'square': self.atom.img_sample_sz = torch.round( torch.sqrt(torch.prod(self.atom.base_target_sz * self.atom.params.search_area_scale))) * torch.ones(2) elif self.atom.params.search_area_shape == 'initrect': # 选的非正方形 self.atom.img_sample_sz = torch.round(self.atom.base_target_sz * self.atom.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.atom.params.feature_size_odd: self.atom.img_sample_sz += feat_max_stride - self.atom.img_sample_sz % (2 * feat_max_stride) else: self.atom.img_sample_sz += feat_max_stride - (self.atom.img_sample_sz + feat_max_stride) % ( 2 * feat_max_stride) # Set sizes self.atom.img_support_sz = self.atom.img_sample_sz self.atom.feature_sz = self.atom.params.features.size(self.atom.img_sample_sz) self.atom.output_sz = self.atom.params.score_upsample_factor * self.atom.img_support_sz # Interpolated size of the output self.atom.iou_img_sample_sz = self.atom.img_sample_sz # Setup scale bounds im = numpy_to_torch(im) self.atom.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.atom.min_scale_factor = torch.max(10 / self.atom.base_target_sz) self.atom.max_scale_factor = torch.min(self.atom.image_sz / self.atom.base_target_sz) self.atom.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.atom.output_window = dcf.hann2d_clipped(self.atom.output_sz.long(), self.atom.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.atom.output_window = dcf.hann2d(self.atom.output_sz.long(), centered=False).to(self.params.device)
def initialize(self, image, info: dict) -> dict: state = info['init_bbox'] # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # metricnet self.metric_model = model_load(self.params.metric_model_path) # warmup start with torch.no_grad(): tmp = np.random.rand(5, 3, 107, 107) tmp = torch.Tensor(tmp) tmp = (Variable(tmp)).type(torch.FloatTensor).cuda() tmp = self.metric_model(tmp) # warmup end self.target_metric_feature = get_target_feature( self.metric_model, np.array(state), np.array(image)) pos_generator = SampleGenerator( 'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3) gt_pos_examples = pos_generator( np.array(state).astype(np.int), 20, [0.7, 1]) gt_iou = 0.7 while gt_pos_examples.shape[0] == 0: gt_iou = gt_iou - 0.1 gt_pos_examples = pos_generator( np.array(state).astype(np.int), 20, [gt_iou, 1]) print('gt-iou:', gt_iou) with torch.no_grad(): gt_pos_features0 = get_anchor_feature(self.metric_model, np.array(image), gt_pos_examples) gt_pos_features = gt_pos_features0.cpu().detach().numpy() # target_metric_feature = self.target_metric_feature.repeat(gt_pos_features.shape[0], 1) # pos_all = torch.norm(gt_pos_features0 - target_metric_feature, 2, dim=1).view(-1) # self.similar=pos_all.mean()*self.params.sim_rate # print('similarThresh',self.similar) self.clf = lof_fit(gt_pos_features, k=5) self.lof_thresh = 0 self.target_features_all = [] self.target_features_all.append(self.target_metric_feature) # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if getattr(self.params, 'search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() out = {'time': time.time() - tic} return out
def initialize(self, image, info: dict) -> dict: state = info['init_bbox'] # Initialize some stuff self.frame_num = 1 if not self.params.has('device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = self.params.get('use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if self.params.get('search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() out = {'time': time.time() - tic} return out
def initialize(self, image1, image2, state, *args, **kwargs): # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # Check if image is color self.params.features.set_is_color(image1.shape[2] == 3) self.params.features.set_is_color(image2.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') self.time = 0 tic = time.time() # Get position and size self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() self.target_scale = math.sqrt(search_area) / self.params.image_sample_size # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Set sizes self.img_sample_sz = torch.Tensor([self.params.image_sample_size, self.params.image_sample_size]) self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) if getattr(self.params, 'score_upsample_factor', None) is None: self.output_sz = self.feature_sz[0] else: self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz self.params.score_fusion_strategy = getattr(self.params, 'score_fusion_strategy', 'default') self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Convert image im1 = numpy_to_torch(image1) im2 = numpy_to_torch(image2) #self.im = im # Setup bounds self.image_sz = torch.Tensor([im1.shape[2], im1.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x1 = self.generate_init_samples(im1) x2 = self.generate_init_samples(im2) x = TensorList([torch.cat((v,i),1) for v, i in zip(x1, x2)]) self.init_classifier(x) if self.use_iou_net: self.init_iou_net() # Init memory # self.init_memory(x) self.time += time.time() - tic
def init_classifier(self, init_backbone_feat): # Get classification features x = self.get_classification_features(init_backbone_feat) # Overwrite some parameters in the classifier. (These are not generally changed) self._overwrite_classifier_params(feature_dim=x.shape[-3]) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and self.params.get( 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1] * num) x = torch.cat([ x, F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1), p=prob, training=True) ]) # Set feature size and other related sizes self.feature_sz = torch.Tensor(list(x.shape[-2:])) ksz = self.net.classifier.filter_size self.kernel_size = torch.Tensor( [ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2 # Construct output window self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), (self.output_sz * self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to( self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = self.params.get('net_opt_iter', None) # mask in Transformer self.transformer_label = prutils.gaussian_label_function( target_boxes.cpu().view(-1, 4), 0.1, self.net.classifier.filter_size, self.feature_sz, self.img_sample_sz, end_pad_if_even=False) self.transformer_label = self.transformer_label.unsqueeze(1).cuda() self.x_clf = x self.transformer_memory, _ = self.net.classifier.transformer.encoder( self.x_clf.unsqueeze(1), pos=None) for i in range(x.shape[0]): _, cur_encoded_feat = self.net.classifier.transformer.decoder( x[i, ...].unsqueeze(0).unsqueeze(0), memory=self.transformer_memory, pos=self.transformer_label, query_pos=None) if i == 0: encoded_feat = cur_encoded_feat else: encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0) x = encoded_feat.contiguous() # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter, _, losses = self.net.classifier.get_filter( x, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if self.params.get('update_classifier', True): self.init_memory(TensorList([x])) '''
def initialize(self, image, state, gt, *args, **kwargs): if len(gt) == 8: ww = gt[2] - gt[0] hh = gt[7] - gt[1] else: ww = gt[2] hh = gt[3] # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' if ww < 25 and hh < 25: self.feature_sz = TensorList([torch.Tensor([28., 28.])]) self.output_layer = TensorList(['layer2']) else: self.feature_sz = TensorList([torch.Tensor([14., 14.])]) # self.output_layer = TensorList(['layer3']) self.output_layer = TensorList(['layer3']) # Initialize some stuff if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features(self.output_layer) # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') self.time = 0 tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) if state[3] > 50 or state[2] > 50: self.target_sz = torch.Tensor( [state[3] - state[3] / 8, state[2] - state[2] / 4]) else: self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if getattr(self.params, 'search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() self.time += time.time() - tic self.pool1 = torch.nn.AdaptiveMaxPool2d((1, 224)) self.pool2 = torch.nn.AdaptiveMaxPool2d((224, 1))
def init_classifier_and_regressor(self, init_backbone_feat): # Get classification features x = self.net.get_backbone_clf_feat(init_backbone_feat) train_feat_18_cls = self.get_classification_features(init_backbone_feat) with torch.no_grad(): train_feat_18 = self.net.pyramid_first_conv(x=None, x_backbone=x) train_feat_36 = self.net.pyramid_36(train_feat_18, init_backbone_feat['layer2']) train_feat_72 = self.net.pyramid_72(train_feat_36, init_backbone_feat['layer1']) train_feat_72_cls = self.net.classifier_72.extract_classification_feat(train_feat_72. view(-1, *train_feat_72.shape[-3:])) train_feat_72_reg = self.net.regressor_72.extract_regression_feat( feat_36=train_feat_36.view(-1, *train_feat_36.shape[-3:]), feat_72=train_feat_72.view(-1, *train_feat_72.shape[-3:])) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1]*num) train_feat_18_cls = torch.cat([train_feat_18_cls, F.dropout2d(train_feat_18_cls[0:1, ...]. expand(num, -1, -1, -1), p=prob, training=True)]) train_feat_72_cls = torch.cat([train_feat_72_cls, F.dropout2d(train_feat_72_cls[0:1, ...]. expand(num, -1, -1, -1), p=prob,training=True)]) train_feat_72_reg = torch.cat([train_feat_72_reg, F.dropout2d(train_feat_72_reg[0:1, ...]. expand(num, -1, -1, -1), p=prob,training=True)]) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations num_iter = getattr(self.params, 'net_opt_iter', None) num_iter_72 = getattr(self.params, 'net_opt_iter_72', None) reg_num_iter = getattr(self.params, 'reg_net_opt_iter', None) # Get target filter by running the discriminative model prediction module with torch.no_grad(): # extract target_filter_72, target_filter_18 and target_reg_filter_72 using Clf and Reg model generators. self.target_filter_72, target_filters, losses = self.net.classifier_72.get_filter(train_feat_72_cls, target_boxes, num_iter=num_iter_72) self.target_filter_18, _, _ = self.net.classifier_18.get_filter(train_feat_18_cls, target_boxes, num_iter=num_iter) # get init_reg_filter using target sample and optimize filters using training samples target_feat_36 = train_feat_36.view(-1, *train_feat_36.shape[-3:])[0].unsqueeze(0) target_feat_72 = train_feat_72.view(-1, *train_feat_72.shape[-3:])[0].unsqueeze(0) target_bb = target_boxes[0].unsqueeze(0).clone() init_reg_filter = self.net.regressor_72.generate_init_filter(target_feat_36, target_feat_72, target_bb) if reg_num_iter > 0: self.target_reg_filter_72, _, reg_losses = self.net.regressor_72.generate_filter_optimizer( init_reg_filter, train_feat_72_reg, target_boxes.view(-1, 4).clone(), num_iter=reg_num_iter) else: self.target_reg_filter_72 = init_reg_filter # get initial Clf and Reg model used in tracking process, which merge the initial model and the optimized model. self.init_target_filter_72 = self.target_filter_72 self.init_target_filter_18 = self.target_filter_18 self.init_reg_filter = init_reg_filter # Set feature size and other related sizes self.feature_sz_18 = torch.Tensor(list(x.shape[-2:])) ksz_18 = self.net.classifier_18.filter_size self.kernel_size_18 = torch.Tensor([ksz_18, ksz_18] if isinstance(ksz_18, (int, float)) else ksz_18) self.output_sz_18 = self.feature_sz_18 + (self.kernel_size_18 + 1) % 2 self.feature_sz_72 = torch.Tensor(list(train_feat_72.shape[-2:])) ksz_72 = self.net.classifier_72.filter_size self.kernel_size_72 = torch.Tensor([ksz_72, ksz_72] if isinstance(ksz_72, (int, float)) else ksz_72) self.output_sz_72 = self.feature_sz_72 + (self.kernel_size_72 + 1) % 2 self.output_sz = torch.Tensor([72, 72]) # Construct output window self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Init memory if getattr(self.params, 'update_classifier_and_regressor', True): self.init_memory(TensorList([train_feat_72_cls]), TensorList([train_feat_18_cls]), TensorList([train_feat_72_reg]))