def _generate_label_density(self, target_bb): """ Generates the gaussian label density centered at target_bb args: target_bb - target bounding box (num_images, 4) returns: torch.Tensor - Tensor of shape (num_images, label_sz, label_sz) containing the label for each sample """ feat_sz = self.label_density_params[ 'feature_sz'] * self.label_density_params.get('interp_factor', 1) gauss_label = prutils.gaussian_label_function( target_bb.view(-1, 4), self.label_density_params['sigma_factor'], self.label_density_params['kernel_sz'], feat_sz, self.output_sz, end_pad_if_even=self.label_density_params.get( 'end_pad_if_even', True), density=True, uni_bias=self.label_density_params.get('uni_weight', 0.0)) gauss_label *= (gauss_label > self.label_density_params.get( 'threshold', 0.0)).float() if self.label_density_params.get('normalize', False): g_sum = gauss_label.sum(dim=(-2, -1)) valid = g_sum > 0.01 gauss_label[valid, :, :] /= g_sum[valid].view(-1, 1, 1) gauss_label[~valid, :, :] = 1.0 / (gauss_label.shape[-2] * gauss_label.shape[-1]) gauss_label *= 1.0 - self.label_density_params.get('shrink', 0.0) return gauss_label
def _generate_label_function(self, target_bb, sigma, kernel, feature, output_sz, end_pad_if_even, target_absent=None): gauss_label = prutils.gaussian_label_function(target_bb.view(-1, 4), sigma, kernel, feature, output_sz, end_pad_if_even=end_pad_if_even) if target_absent is not None: gauss_label *= (1 - target_absent).view(-1, 1, 1).float() return gauss_label
def _generate_label_function(self, target_bb, is_distractor=None): gauss_label = prutils.gaussian_label_function( target_bb.view(-1, 4), self.label_function_params['sigma_factor'], self.label_function_params['kernel_sz'], self.label_function_params['feature_sz'], self.output_sz, end_pad_if_even=self.label_function_params.get( 'end_pad_if_even', True)) if is_distractor is not None: gauss_label *= (1 - is_distractor).view(-1, 1, 1).float() return gauss_label
def _generate_label_function(self, target_bb): """ Generates the gaussian label function centered at target_bb args: target_bb - target bounding box (num_images, 4) returns: torch.Tensor - Tensor of shape (num_images, label_sz, label_sz) containing the label for each sample """ gauss_label = prutils.gaussian_label_function(target_bb.view(-1, 4), self.label_function_params['sigma_factor'], self.label_function_params['kernel_sz'], self.label_function_params['feature_sz'], self.output_sz, end_pad_if_even=self.label_function_params.get('end_pad_if_even', True)) return gauss_label
def init_classifier(self, init_backbone_feat): # Get classification features x = self.get_classification_features(init_backbone_feat) # Overwrite some parameters in the classifier. (These are not generally changed) self._overwrite_classifier_params(feature_dim=x.shape[-3]) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and self.params.get( 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1] * num) x = torch.cat([ x, F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1), p=prob, training=True) ]) # Set feature size and other related sizes self.feature_sz = torch.Tensor(list(x.shape[-2:])) ksz = self.net.classifier.filter_size self.kernel_size = torch.Tensor( [ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2 # Construct output window self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), (self.output_sz * self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to( self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = self.params.get('net_opt_iter', None) # mask in Transformer self.transformer_label = prutils.gaussian_label_function( target_boxes.cpu().view(-1, 4), 0.1, self.net.classifier.filter_size, self.feature_sz, self.img_sample_sz, end_pad_if_even=False) self.transformer_label = self.transformer_label.unsqueeze(1).cuda() self.x_clf = x self.transformer_memory, _ = self.net.classifier.transformer.encoder( self.x_clf.unsqueeze(1), pos=None) for i in range(x.shape[0]): _, cur_encoded_feat = self.net.classifier.transformer.decoder( x[i, ...].unsqueeze(0).unsqueeze(0), memory=self.transformer_memory, pos=self.transformer_label, query_pos=None) if i == 0: encoded_feat = cur_encoded_feat else: encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0) x = encoded_feat.contiguous() # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter, _, losses = self.net.classifier.get_filter( x, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if self.params.get('update_classifier', True): self.init_memory(TensorList([x])) '''
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # print(self.frame_num) # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Extract backbone features backbone_feat, sample_coords, im_patches = self.extract_backbone_features( im, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features x_clf = self.get_classification_features(backbone_feat) decoded_x, test_x = self.transformer_decoder(x_clf) # Location of sample sample_pos, sample_scales = self.get_sample_location(sample_coords) # Compute classification scores scores_raw = self.classify_target(test_x) # Localize the target translation_vec, scale_ind, s, flag = self.localize_target( scores_raw, sample_pos, sample_scales) new_pos = sample_pos[scale_ind, :] + translation_vec # Update position and scale if flag != 'not_found': if self.params.get('use_iou_net', True): update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind, :], sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.get('hard_negative_learning_rate', None) if hard_negative else None if update_flag and self.params.get('update_classifier', False): # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) if (self.frame_num - 1) % self.params.transformer_skipping == 0: # Update Transformer memory cur_tf_label = prutils.gaussian_label_function( target_box.cpu().view(-1, 4), 0.1, self.net.classifier.filter_size, self.feature_sz, self.img_sample_sz, end_pad_if_even=False) if self.x_clf.shape[0] < self.params.transformer_memory_size: self.transformer_label = torch.cat([ cur_tf_label.unsqueeze(1).cuda(), self.transformer_label ], dim=0) self.x_clf = torch.cat([x_clf, self.x_clf], dim=0) else: self.transformer_label = torch.cat([ cur_tf_label.unsqueeze(1).cuda(), self.transformer_label[:-1, ...] ], dim=0) self.x_clf = torch.cat([x_clf, self.x_clf[:-1, ...]], dim=0) self.transformer_memory, _ = self.net.classifier.transformer.encoder( self.x_clf.unsqueeze(1), pos=None) # Set the pos of the tracker to iounet pos if self.params.get('use_iou_net', True) and flag != 'not_found' and hasattr( self, 'pos_iounet'): self.pos = self.pos_iounet.clone() score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() # Visualize and set debug info self.search_area_box = torch.cat( (sample_coords[scale_ind, [1, 0]], sample_coords[scale_ind, [3, 2]] - sample_coords[scale_ind, [1, 0]] - 1)) self.debug_info['flag' + self.id_str] = flag self.debug_info['max_score' + self.id_str] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map' + self.id_str) self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # Compute output bounding box new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) if self.params.get('output_not_found_box', False) and flag == 'not_found': output_state = [-1, -1, -1, -1] else: output_state = new_state.tolist() out = {'target_bbox': output_state} return out