Пример #1
0
    def init_classifier(self, init_backbone_feat_rgb, init_backbone_feat_d):
        # Get classification features
        x_rgb, x_d = self.get_classification_features(init_backbone_feat_rgb, init_backbone_feat_d)

        # Overwrite some parameters in the classifier. (These are not generally changed)
        self._overwrite_classifier_params(feature_dim=x_rgb.shape[-3])

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and self.params.get('use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1]*num)
            x_rgb = torch.cat([x_rgb, F.dropout2d(x_rgb[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])
            x_d = torch.cat([x_d, F.dropout2d(x_d[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x_rgb.shape[-2:]))
        ksz = self.net_rgb.classifier.filter_size
        self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1)%2

        # Construct output window
        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), (self.output_sz*self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = self.params.get('net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter_rgb, _, losses_rgb = self.net_rgb.classifier.get_filter(x_rgb, target_boxes, num_iter=num_iter,
                                                                           compute_losses=plot_loss)
            self.target_filter_d, _, losses_d = self.net_d.classifier.get_filter(x_d, target_boxes, num_iter=num_iter,
                                                                           compute_losses=plot_loss)

        # Init memory
        if self.params.get('update_classifier', True):
            self.init_memory(TensorList([x_rgb]), TensorList([x_d]))

        if plot_loss:
            if isinstance(losses_rgb, dict):
                losses_rgb = losses_rgb['train']
                losses_d = losses_d['train']
            self.losses_rgb = torch.cat(losses_rgb)
            self.losses_d = torch.cat(losses_d)
            if self.visdom is not None:
                self.visdom.register((self.losses_rgb, torch.arange(self.losses_rgb.numel())), 'lineplot', 3, 'Training Loss_RGB' + self.id_str)
                self.visdom.register((self.losses_d, torch.arange(self.losses_d.numel())), 'lineplot', 3, 'Training Loss_D' + self.id_str)
            elif self.params.debug >= 3:
                plot_graph(self.losses_rgb, 10, title='Training Loss_RGB' + self.id_str)
                plot_graph(self.losses_d, 10, title='Training Loss_D' + self.id_str)
Пример #2
0
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1]*num)
            x = torch.cat([x, F.dropout2d(x[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])

        # Set feature size and other related sizes
        #18,18
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))

        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1)%2
        #print(['output_sz', self.output_sz])

        # Construct output window
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = getattr(self.params, 'net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(x, target_boxes, num_iter=num_iter,
                                                                           compute_losses=plot_loss)

        # Init memory
        if getattr(self.params, 'update_classifier', True):
            self.init_memory(TensorList([x]))

        if plot_loss:
            if isinstance(losses, dict):
                losses = losses['train']
            self.losses = torch.stack(losses)
            if self.visdom is not None:
                self.visdom.register((self.losses, torch.arange(self.losses.numel())), 'lineplot', 3, 'Training Loss')
            elif self.params.debug >= 3:
                plot_graph(self.losses, 10, title='Training loss')
Пример #3
0
 def __init__(self, training_samples: TensorList, y: TensorList,
              filter_reg: torch.Tensor, sample_weights: TensorList,
              response_activation, size):
     self.training_samples = training_samples
     self.y = y
     self.filter_reg = filter_reg
     self.sample_weights = sample_weights
     self.response_activation = response_activation
     self.size = size
     self.pool6 = torch.nn.AdaptiveMaxPool2d((1, self.size[0]))
     self.pool7 = torch.nn.AdaptiveMaxPool2d((self.size[0], 1))
     self.output_sz = self.size
     self.device = 'cuda'
     self.output_window = dcf.hann2d(self.output_sz.long(),
                                     centered=False).to(self.device)
Пример #4
0
    def setting_adaptive_search_region_using_speed(self, im):
        """ reinitialze search region scale for next frame """
        self.atom.target_scale = 1.0
        search_area = torch.prod(self.atom.target_sz * self.atom.params.search_area_scale).item()

        if search_area > self.atom.params.max_image_sample_size:
            self.atom.target_scale = math.sqrt(search_area / self.atom.params.max_image_sample_size)
        elif search_area < self.atom.params.min_image_sample_size:
            self.atom.target_scale = math.sqrt(search_area / self.atom.params.min_image_sample_size)

        # Target size in base scale
        self.atom.base_target_sz = self.atom.target_sz / self.atom.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.atom.params.features.stride())
        if getattr(self.atom.params, 'search_area_shape', 'square') == 'square':
            self.atom.img_sample_sz = torch.round(
                torch.sqrt(torch.prod(self.atom.base_target_sz * self.atom.params.search_area_scale))) * torch.ones(2)
        elif self.atom.params.search_area_shape == 'initrect':  # 选的非正方形
            self.atom.img_sample_sz = torch.round(self.atom.base_target_sz * self.atom.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.atom.params.feature_size_odd:
            self.atom.img_sample_sz += feat_max_stride - self.atom.img_sample_sz % (2 * feat_max_stride)
        else:
            self.atom.img_sample_sz += feat_max_stride - (self.atom.img_sample_sz + feat_max_stride) % (
                        2 * feat_max_stride)

        # Set sizes
        self.atom.img_support_sz = self.atom.img_sample_sz
        self.atom.feature_sz = self.atom.params.features.size(self.atom.img_sample_sz)
        self.atom.output_sz = self.atom.params.score_upsample_factor * self.atom.img_support_sz  # Interpolated size of the output
        self.atom.iou_img_sample_sz = self.atom.img_sample_sz
        # Setup scale bounds
        im = numpy_to_torch(im)
        self.atom.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.atom.min_scale_factor = torch.max(10 / self.atom.base_target_sz)
        self.atom.max_scale_factor = torch.min(self.atom.image_sz / self.atom.base_target_sz)

        self.atom.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.atom.output_window = dcf.hann2d_clipped(self.atom.output_sz.long(),
                                                             self.atom.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale,
                                                             centered=False).to(self.params.device)
            else:
                self.atom.output_window = dcf.hann2d(self.atom.output_sz.long(), centered=False).to(self.params.device)
Пример #5
0
    def init_learning(self):
        # Get window function
        self.feature_window = TensorList(
            [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Filter regularization
        self.filter_reg = self.fparams.attribute('filter_reg')

        # Activation function after the projection matrix (phi_1 in the paper)
        projection_activation = getattr(self.params, 'projection_activation',
                                        'none')
        if isinstance(projection_activation, tuple):
            projection_activation, act_param = projection_activation

        if projection_activation == 'none':
            self.projection_activation = lambda x: x
        elif projection_activation == 'relu':
            self.projection_activation = torch.nn.ReLU(inplace=True)
        elif projection_activation == 'elu':
            self.projection_activation = torch.nn.ELU(inplace=True)
        elif projection_activation == 'mlu':
            self.projection_activation = lambda x: F.elu(
                F.leaky_relu(x, 1 / act_param), act_param)
        else:
            raise ValueError('Unknown activation')

        # Activation function after the output scores (phi_2 in the paper)
        response_activation = getattr(self.params, 'response_activation',
                                      'none')
        if isinstance(response_activation, tuple):
            response_activation, act_param = response_activation

        if response_activation == 'none':
            self.response_activation = lambda x: x
        elif response_activation == 'relu':
            self.response_activation = torch.nn.ReLU(inplace=True)
        elif response_activation == 'elu':
            self.response_activation = torch.nn.ELU(inplace=True)
        elif response_activation == 'mlu':
            self.response_activation = lambda x: F.elu(
                F.leaky_relu(x, 1 / act_param), act_param)
        else:
            raise ValueError('Unknown activation')
Пример #6
0
    def initialize(self, image, info: dict) -> dict:

        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # metricnet
        self.metric_model = model_load(self.params.metric_model_path)
        # warmup start
        with torch.no_grad():
            tmp = np.random.rand(5, 3, 107, 107)
            tmp = torch.Tensor(tmp)
            tmp = (Variable(tmp)).type(torch.FloatTensor).cuda()
            tmp = self.metric_model(tmp)
            # warmup end
            self.target_metric_feature = get_target_feature(
                self.metric_model, np.array(state), np.array(image))
        pos_generator = SampleGenerator(
            'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3)
        gt_pos_examples = pos_generator(
            np.array(state).astype(np.int), 20, [0.7, 1])
        gt_iou = 0.7
        while gt_pos_examples.shape[0] == 0:
            gt_iou = gt_iou - 0.1
            gt_pos_examples = pos_generator(
                np.array(state).astype(np.int), 20, [gt_iou, 1])
        print('gt-iou:', gt_iou)
        with torch.no_grad():
            gt_pos_features0 = get_anchor_feature(self.metric_model,
                                                  np.array(image),
                                                  gt_pos_examples)
            gt_pos_features = gt_pos_features0.cpu().detach().numpy()
            # target_metric_feature = self.target_metric_feature.repeat(gt_pos_features.shape[0], 1)
            # pos_all = torch.norm(gt_pos_features0 - target_metric_feature, 2, dim=1).view(-1)
            # self.similar=pos_all.mean()*self.params.sim_rate
            # print('similarThresh',self.similar)
        self.clf = lof_fit(gt_pos_features, k=5)
        self.lof_thresh = 0
        self.target_features_all = []
        self.target_features_all.append(self.target_metric_feature)
        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if getattr(self.params, 'search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                torch.sqrt(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
                                             self.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
        else:
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area /
                    self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=False).to(
                                                    self.params.device)

        # Initialize some learning things
        self.init_learning()

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:
            self.init_iou_net()

        # Initialize projection matrix
        self.init_projection_matrix(x)

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory
        self.init_memory(train_x)

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        out = {'time': time.time() - tic}
        return out
Пример #7
0
    def initialize(self, image, info: dict) -> dict:
        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not self.params.has('device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = self.params.get('use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if self.params.get('search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                torch.sqrt(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
                                             self.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
        else:
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area /
                    self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=False).to(
                                                    self.params.device)

        # Initialize some learning things
        self.init_learning()

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:
            self.init_iou_net()

        # Initialize projection matrix
        self.init_projection_matrix(x)

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory
        self.init_memory(train_x)

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        out = {'time': time.time() - tic}
        return out
Пример #8
0
    def initialize(self, image, info: dict) -> dict:

        initSeed = 1
        torch.manual_seed(initSeed)
        torch.cuda.manual_seed(initSeed)
        torch.cuda.manual_seed_all(initSeed)
        np.random.seed(initSeed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        os.environ['PYTHONHASHSEED'] = str(initSeed)
        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # metricnet
        self.metric_model = model_load(self.params.metric_model_path)
        # warmup start
        with torch.no_grad():
            tmp = np.random.rand(5, 3, 107, 107)
            tmp = torch.Tensor(tmp)
            tmp = (Variable(tmp)).type(torch.FloatTensor).cuda()
            tmp = self.metric_model(tmp)
            # warmup end
            self.target_metric_feature = get_target_feature(
                self.metric_model, np.array(state), np.array(image))
        pos_generator = SampleGenerator(
            'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3)
        gt_pos_examples = pos_generator(
            np.array(state).astype(np.int), 20, [0.7, 1])
        gt_iou = 0.7
        while gt_pos_examples.shape[0] == 0:
            gt_iou = gt_iou - 0.1
            gt_pos_examples = pos_generator(
                np.array(state).astype(np.int), 20, [gt_iou, 1])
        # print('gt-iou:', gt_iou)
        # self.gt_pos_features = get_anchor_feature(self.metric_model, np.array(image), gt_pos_examples).cpu().detach().numpy()
        with torch.no_grad():
            gt_pos_features0 = get_anchor_feature(self.metric_model,
                                                  np.array(image),
                                                  gt_pos_examples)
            gt_pos_features = gt_pos_features0.cpu().detach().numpy()
            target_metric_feature = self.target_metric_feature.repeat(
                gt_pos_features.shape[0], 1)
            pos_all = torch.norm(gt_pos_features0 - target_metric_feature,
                                 2,
                                 dim=1).view(-1)
            self.similar = pos_all.mean() * self.params.sim_rate
            print('similarThresh', self.similar)
        self.target_features_all = []
        self.target_features_all.append(self.target_metric_feature)
        self.clf = lof_fit(gt_pos_features, k=5)

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(
            torch.sqrt(
                torch.prod(self.base_target_sz *
                           self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
            2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList(
            [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([
            dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                   self.params.interpolation_bicubic_a,
                                   self.params.interpolation_centering,
                                   self.params.interpolation_windowing,
                                   self.params.device) for sz in self.filter_sz
        ])

        # Get regularization filter
        self.reg_filter = TensorList([
            dcf.get_reg_filter(self.img_support_sz, self.base_target_sz,
                               fparams).to(self.params.device)
            for fparams in self.fparams
        ])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(
            self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([
            dcf.label_function(sz, sig).to(self.params.device)
            for sz, sig in zip(self.filter_sz, sigma)
        ])

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList(
            [e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([
            torch.svd(C)[0][:, :cdim].clone()
            for C, cdim in zip(cov_x, self.compressed_dim)
        ])

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                    continue
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(
                        shift) / self.img_support_sz
                    xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i,
                                                               ...],
                                                            shift=shift_samp)

        # Shift sample
        shift_samp = 2 * math.pi * (self.pos - self.pos.round()) / (
            self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList(
            [xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None] * len(self.num_stored_samples)
        self.sample_weights = TensorList(
            [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights,
                                    self.init_sample_weights,
                                    num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList([
            xf.new_zeros(xf.shape[2], xf.shape[3],
                         self.params.sample_memory_size, cdim, 2)
            for xf, cdim in zip(train_xf, self.compressed_dim)
        ])

        # Initialize filter
        self.filter = TensorList([
            xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2)
            for xf, cdim in zip(train_xf, self.compressed_dim)
        ])

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples,
                                                   self.yf, self.reg_filter,
                                                   self.projection_matrix,
                                                   self.params,
                                                   self.init_sample_weights)
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem,
                                             joint_var,
                                             debug=(self.params.debug >= 1),
                                             visdom=self.visdom)

        if self.params.update_projection_matrix:
            self.joint_optimizer.run(
                self.params.init_CG_iter // self.params.init_GN_iter,
                self.params.init_GN_iter)

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples,
                                            self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples,
                                         compressed_samples):
            train_samp[:, :, :init_samp.shape[2], :, :] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples,
                                       self.yf, self.sample_weights,
                                       self.reg_filter)
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone(
        )

        if not self.params.update_projection_matrix:
            self.filter_optimizer.run(self.params.init_CG_iter)

        # Post optimization
        self.filter_optimizer.run(self.params.post_init_CG_iter)

        self.symmetrize_filter()

        # metricnet_lof
        self.current_target_metric_feature = []
        self.train_xf = []
        # self.iou=[]
        # self.lof_thresh=3.5

        self.lof_thresh = self.params.lof_rate
Пример #9
0
    def initialize(self, image1, image2, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # Check if image is color
        self.params.features.set_is_color(image1.shape[2] == 3)
        self.params.features.set_is_color(image2.shape[2] == 3)
        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        self.target_scale =  math.sqrt(search_area) / self.params.image_sample_size

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Set sizes
        self.img_sample_sz = torch.Tensor([self.params.image_sample_size, self.params.image_sample_size])
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        if getattr(self.params, 'score_upsample_factor', None) is None:
            self.output_sz = self.feature_sz[0]
        else:
            self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        self.params.score_fusion_strategy = getattr(self.params, 'score_fusion_strategy', 'default')
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)

            self.output_window = self.output_window.squeeze(0)
        # Convert image
        im1 = numpy_to_torch(image1)
        im2 = numpy_to_torch(image2)
        #self.im = im

        # Setup bounds
        self.image_sz = torch.Tensor([im1.shape[2], im1.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x1 = self.generate_init_samples(im1)
        x2 = self.generate_init_samples(im2)

        x = TensorList([torch.cat((v,i),1) for v, i in zip(x1, x2)])

        self.init_classifier(x)

        if self.use_iou_net:
            self.init_iou_net()

        # Init memory
        # self.init_memory(x)

        self.time += time.time() - tic
Пример #10
0
    def initialize(self, image, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.min_image_sample_size)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(torch.sqrt(torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz    # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                                self.params.interpolation_bicubic_a, self.params.interpolation_centering,
                                                self.params.interpolation_windowing, self.params.device) for sz in self.filter_sz])

        # Get regularization filter
        self.reg_filter = TensorList([dcf.get_reg_filter(self.img_support_sz, self.base_target_sz, fparams).to(self.params.device)
                                      for fparams in self.fparams])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([dcf.label_function(sz, sig).to(self.params.device) for sz, sig in zip(self.filter_sz, sigma)])

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute('learning_rate')
        if self.params.CG_forgetting_rate is None or max(self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (1 - max(self.params.precond_learning_rate))**self.params.CG_forgetting_rate


        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList([e.permute(1,0,2,3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([torch.svd(C)[0][:,:cdim].clone() for C, cdim in zip(cov_x, self.compressed_dim)])

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                    continue
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(shift) / self.img_support_sz
                    xf[1+i:2+i,...] = fourier.shift_fs(xf[1+i:2+i,...], shift=shift_samp)

        # Shift sample
        shift_samp = 2*math.pi * (self.pos - self.pos.round()) / (self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList([xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)


        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None]*len(self.num_stored_samples)
        self.sample_weights = TensorList([xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList(
            [xf.new_zeros(xf.shape[2], xf.shape[3], self.params.sample_memory_size, cdim, 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Initialize filter
        self.filter = TensorList(
            [xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples, self.yf, self.reg_filter, self.projection_matrix, self.params, self.init_sample_weights)
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug>=3))

        if self.params.update_projection_matrix:
            self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples, self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples, compressed_samples):
            train_samp[:,:,:init_samp.shape[2],:,:] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples, self.yf, self.sample_weights, self.reg_filter)
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone()

        if not self.params.update_projection_matrix:
            self.filter_optimizer.run(self.params.init_CG_iter)

        # Post optimization
        self.filter_optimizer.run(self.params.post_init_CG_iter)

        self.symmetrize_filter()
Пример #11
0
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Overwrite some parameters in the classifier. (These are not generally changed)
        self._overwrite_classifier_params(feature_dim=x.shape[-3])

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and self.params.get(
                'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1] * num)
            x = torch.cat([
                x,
                F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1),
                            p=prob,
                            training=True)
            ])

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))
        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor(
            [ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2

        # Construct output window
        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    (self.output_sz * self.params.effective_search_area /
                     self.params.search_area_scale).long(),
                    centered=True).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=True).to(
                                                    self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = self.params.get('net_opt_iter', None)

        # mask in Transformer
        self.transformer_label = prutils.gaussian_label_function(
            target_boxes.cpu().view(-1, 4),
            0.1,
            self.net.classifier.filter_size,
            self.feature_sz,
            self.img_sample_sz,
            end_pad_if_even=False)

        self.transformer_label = self.transformer_label.unsqueeze(1).cuda()
        self.x_clf = x

        self.transformer_memory, _ = self.net.classifier.transformer.encoder(
            self.x_clf.unsqueeze(1), pos=None)

        for i in range(x.shape[0]):
            _, cur_encoded_feat = self.net.classifier.transformer.decoder(
                x[i, ...].unsqueeze(0).unsqueeze(0),
                memory=self.transformer_memory,
                pos=self.transformer_label,
                query_pos=None)
            if i == 0:
                encoded_feat = cur_encoded_feat
            else:
                encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0)
        x = encoded_feat.contiguous()

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(
                x, target_boxes, num_iter=num_iter, compute_losses=plot_loss)

        # Init memory
        if self.params.get('update_classifier', True):
            self.init_memory(TensorList([x]))
        '''
Пример #12
0
    def initialize(self, image, state, gt, *args, **kwargs):
        if len(gt) == 8:
            ww = gt[2] - gt[0]
            hh = gt[7] - gt[1]
        else:
            ww = gt[2]
            hh = gt[3]
        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'
        if ww < 25 and hh < 25:
            self.feature_sz = TensorList([torch.Tensor([28., 28.])])
            self.output_layer = TensorList(['layer2'])
        else:
            self.feature_sz = TensorList([torch.Tensor([14., 14.])])
            #     self.output_layer = TensorList(['layer3'])
            self.output_layer = TensorList(['layer3'])
        # Initialize some stuff
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features(self.output_layer)

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])
        if state[3] > 50 or state[2] > 50:

            self.target_sz = torch.Tensor(
                [state[3] - state[3] / 8, state[2] - state[2] / 4])
        else:
            self.target_sz = torch.Tensor([state[3], state[2]])
        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if getattr(self.params, 'search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                torch.sqrt(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
                                             self.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
        else:
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area /
                    self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=False).to(
                                                    self.params.device)

        # Initialize some learning things
        self.init_learning()

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:
            self.init_iou_net()

        # Initialize projection matrix
        self.init_projection_matrix(x)

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory
        self.init_memory(train_x)

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        self.time += time.time() - tic
        self.pool1 = torch.nn.AdaptiveMaxPool2d((1, 224))
        self.pool2 = torch.nn.AdaptiveMaxPool2d((224, 1))
Пример #13
0
    def init_classifier_and_regressor(self, init_backbone_feat):
        # Get classification features
        x = self.net.get_backbone_clf_feat(init_backbone_feat)
        train_feat_18_cls = self.get_classification_features(init_backbone_feat)

        with torch.no_grad():
            train_feat_18 = self.net.pyramid_first_conv(x=None, x_backbone=x)
            train_feat_36 = self.net.pyramid_36(train_feat_18, init_backbone_feat['layer2'])
            train_feat_72 = self.net.pyramid_72(train_feat_36, init_backbone_feat['layer1'])

            train_feat_72_cls = self.net.classifier_72.extract_classification_feat(train_feat_72.
                                                                                   view(-1, *train_feat_72.shape[-3:]))
            train_feat_72_reg = self.net.regressor_72.extract_regression_feat(
                                                            feat_36=train_feat_36.view(-1, *train_feat_36.shape[-3:]),
                                                            feat_72=train_feat_72.view(-1, *train_feat_72.shape[-3:]))

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1]*num)
            train_feat_18_cls = torch.cat([train_feat_18_cls,
                                           F.dropout2d(train_feat_18_cls[0:1, ...].
                                                       expand(num, -1, -1, -1), p=prob, training=True)])
            train_feat_72_cls = torch.cat([train_feat_72_cls,
                                           F.dropout2d(train_feat_72_cls[0:1, ...].
                                                       expand(num, -1, -1, -1), p=prob,training=True)])
            train_feat_72_reg = torch.cat([train_feat_72_reg,
                                           F.dropout2d(train_feat_72_reg[0:1, ...].
                                                       expand(num, -1, -1, -1), p=prob,training=True)])

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        num_iter = getattr(self.params, 'net_opt_iter', None)
        num_iter_72 = getattr(self.params, 'net_opt_iter_72', None)
        reg_num_iter = getattr(self.params, 'reg_net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            # extract target_filter_72, target_filter_18 and target_reg_filter_72 using Clf and Reg model generators.
            self.target_filter_72, target_filters, losses = self.net.classifier_72.get_filter(train_feat_72_cls,
                                                                                              target_boxes,
                                                                                              num_iter=num_iter_72)
            self.target_filter_18, _, _ = self.net.classifier_18.get_filter(train_feat_18_cls,
                                                                            target_boxes,
                                                                            num_iter=num_iter)

            # get init_reg_filter using target sample and optimize filters using training samples
            target_feat_36 = train_feat_36.view(-1, *train_feat_36.shape[-3:])[0].unsqueeze(0)
            target_feat_72 = train_feat_72.view(-1, *train_feat_72.shape[-3:])[0].unsqueeze(0)
            target_bb = target_boxes[0].unsqueeze(0).clone()
            init_reg_filter = self.net.regressor_72.generate_init_filter(target_feat_36, target_feat_72, target_bb)

            if reg_num_iter > 0:
                self.target_reg_filter_72, _, reg_losses = self.net.regressor_72.generate_filter_optimizer(
                    init_reg_filter, train_feat_72_reg, target_boxes.view(-1, 4).clone(), num_iter=reg_num_iter)
            else:
                self.target_reg_filter_72 = init_reg_filter

            # get initial Clf and Reg model used in tracking process, which merge the initial model and the optimized model.
            self.init_target_filter_72 = self.target_filter_72
            self.init_target_filter_18 = self.target_filter_18
            self.init_reg_filter = init_reg_filter

        # Set feature size and other related sizes
        self.feature_sz_18 = torch.Tensor(list(x.shape[-2:]))
        ksz_18 = self.net.classifier_18.filter_size
        self.kernel_size_18 = torch.Tensor([ksz_18, ksz_18] if isinstance(ksz_18, (int, float)) else ksz_18)
        self.output_sz_18 = self.feature_sz_18 + (self.kernel_size_18 + 1) % 2

        self.feature_sz_72 = torch.Tensor(list(train_feat_72.shape[-2:]))
        ksz_72 = self.net.classifier_72.filter_size
        self.kernel_size_72 = torch.Tensor([ksz_72, ksz_72] if isinstance(ksz_72, (int, float)) else ksz_72)
        self.output_sz_72 = self.feature_sz_72 + (self.kernel_size_72 + 1) % 2
        self.output_sz = torch.Tensor([72, 72])

        # Construct output window
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Init memory
        if getattr(self.params, 'update_classifier_and_regressor', True):
            self.init_memory(TensorList([train_feat_72_cls]),
                             TensorList([train_feat_18_cls]), TensorList([train_feat_72_reg]))