Пример #1
0
    def init_classifier(self, init_backbone_feat_rgb, init_backbone_feat_d):
        # Get classification features
        x_rgb, x_d = self.get_classification_features(init_backbone_feat_rgb, init_backbone_feat_d)

        # Overwrite some parameters in the classifier. (These are not generally changed)
        self._overwrite_classifier_params(feature_dim=x_rgb.shape[-3])

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and self.params.get('use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1]*num)
            x_rgb = torch.cat([x_rgb, F.dropout2d(x_rgb[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])
            x_d = torch.cat([x_d, F.dropout2d(x_d[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x_rgb.shape[-2:]))
        ksz = self.net_rgb.classifier.filter_size
        self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1)%2

        # Construct output window
        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), (self.output_sz*self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = self.params.get('net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter_rgb, _, losses_rgb = self.net_rgb.classifier.get_filter(x_rgb, target_boxes, num_iter=num_iter,
                                                                           compute_losses=plot_loss)
            self.target_filter_d, _, losses_d = self.net_d.classifier.get_filter(x_d, target_boxes, num_iter=num_iter,
                                                                           compute_losses=plot_loss)

        # Init memory
        if self.params.get('update_classifier', True):
            self.init_memory(TensorList([x_rgb]), TensorList([x_d]))

        if plot_loss:
            if isinstance(losses_rgb, dict):
                losses_rgb = losses_rgb['train']
                losses_d = losses_d['train']
            self.losses_rgb = torch.cat(losses_rgb)
            self.losses_d = torch.cat(losses_d)
            if self.visdom is not None:
                self.visdom.register((self.losses_rgb, torch.arange(self.losses_rgb.numel())), 'lineplot', 3, 'Training Loss_RGB' + self.id_str)
                self.visdom.register((self.losses_d, torch.arange(self.losses_d.numel())), 'lineplot', 3, 'Training Loss_D' + self.id_str)
            elif self.params.debug >= 3:
                plot_graph(self.losses_rgb, 10, title='Training Loss_RGB' + self.id_str)
                plot_graph(self.losses_d, 10, title='Training Loss_D' + self.id_str)
Пример #2
0
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1]*num)
            x = torch.cat([x, F.dropout2d(x[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])

        # Set feature size and other related sizes
        #18,18
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))

        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1)%2
        #print(['output_sz', self.output_sz])

        # Construct output window
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = getattr(self.params, 'net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(x, target_boxes, num_iter=num_iter,
                                                                           compute_losses=plot_loss)

        # Init memory
        if getattr(self.params, 'update_classifier', True):
            self.init_memory(TensorList([x]))

        if plot_loss:
            if isinstance(losses, dict):
                losses = losses['train']
            self.losses = torch.stack(losses)
            if self.visdom is not None:
                self.visdom.register((self.losses, torch.arange(self.losses.numel())), 'lineplot', 3, 'Training Loss')
            elif self.params.debug >= 3:
                plot_graph(self.losses, 10, title='Training loss')
Пример #3
0
    def setting_adaptive_search_region_using_speed(self, im):
        """ reinitialze search region scale for next frame """
        self.atom.target_scale = 1.0
        search_area = torch.prod(self.atom.target_sz * self.atom.params.search_area_scale).item()

        if search_area > self.atom.params.max_image_sample_size:
            self.atom.target_scale = math.sqrt(search_area / self.atom.params.max_image_sample_size)
        elif search_area < self.atom.params.min_image_sample_size:
            self.atom.target_scale = math.sqrt(search_area / self.atom.params.min_image_sample_size)

        # Target size in base scale
        self.atom.base_target_sz = self.atom.target_sz / self.atom.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.atom.params.features.stride())
        if getattr(self.atom.params, 'search_area_shape', 'square') == 'square':
            self.atom.img_sample_sz = torch.round(
                torch.sqrt(torch.prod(self.atom.base_target_sz * self.atom.params.search_area_scale))) * torch.ones(2)
        elif self.atom.params.search_area_shape == 'initrect':  # 选的非正方形
            self.atom.img_sample_sz = torch.round(self.atom.base_target_sz * self.atom.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.atom.params.feature_size_odd:
            self.atom.img_sample_sz += feat_max_stride - self.atom.img_sample_sz % (2 * feat_max_stride)
        else:
            self.atom.img_sample_sz += feat_max_stride - (self.atom.img_sample_sz + feat_max_stride) % (
                        2 * feat_max_stride)

        # Set sizes
        self.atom.img_support_sz = self.atom.img_sample_sz
        self.atom.feature_sz = self.atom.params.features.size(self.atom.img_sample_sz)
        self.atom.output_sz = self.atom.params.score_upsample_factor * self.atom.img_support_sz  # Interpolated size of the output
        self.atom.iou_img_sample_sz = self.atom.img_sample_sz
        # Setup scale bounds
        im = numpy_to_torch(im)
        self.atom.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.atom.min_scale_factor = torch.max(10 / self.atom.base_target_sz)
        self.atom.max_scale_factor = torch.min(self.atom.image_sz / self.atom.base_target_sz)

        self.atom.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.atom.output_window = dcf.hann2d_clipped(self.atom.output_sz.long(),
                                                             self.atom.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale,
                                                             centered=False).to(self.params.device)
            else:
                self.atom.output_window = dcf.hann2d(self.atom.output_sz.long(), centered=False).to(self.params.device)
Пример #4
0
    def initialize(self, image, info: dict) -> dict:

        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # metricnet
        self.metric_model = model_load(self.params.metric_model_path)
        # warmup start
        with torch.no_grad():
            tmp = np.random.rand(5, 3, 107, 107)
            tmp = torch.Tensor(tmp)
            tmp = (Variable(tmp)).type(torch.FloatTensor).cuda()
            tmp = self.metric_model(tmp)
            # warmup end
            self.target_metric_feature = get_target_feature(
                self.metric_model, np.array(state), np.array(image))
        pos_generator = SampleGenerator(
            'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3)
        gt_pos_examples = pos_generator(
            np.array(state).astype(np.int), 20, [0.7, 1])
        gt_iou = 0.7
        while gt_pos_examples.shape[0] == 0:
            gt_iou = gt_iou - 0.1
            gt_pos_examples = pos_generator(
                np.array(state).astype(np.int), 20, [gt_iou, 1])
        print('gt-iou:', gt_iou)
        with torch.no_grad():
            gt_pos_features0 = get_anchor_feature(self.metric_model,
                                                  np.array(image),
                                                  gt_pos_examples)
            gt_pos_features = gt_pos_features0.cpu().detach().numpy()
            # target_metric_feature = self.target_metric_feature.repeat(gt_pos_features.shape[0], 1)
            # pos_all = torch.norm(gt_pos_features0 - target_metric_feature, 2, dim=1).view(-1)
            # self.similar=pos_all.mean()*self.params.sim_rate
            # print('similarThresh',self.similar)
        self.clf = lof_fit(gt_pos_features, k=5)
        self.lof_thresh = 0
        self.target_features_all = []
        self.target_features_all.append(self.target_metric_feature)
        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if getattr(self.params, 'search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                torch.sqrt(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
                                             self.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
        else:
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area /
                    self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=False).to(
                                                    self.params.device)

        # Initialize some learning things
        self.init_learning()

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:
            self.init_iou_net()

        # Initialize projection matrix
        self.init_projection_matrix(x)

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory
        self.init_memory(train_x)

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        out = {'time': time.time() - tic}
        return out
Пример #5
0
    def initialize(self, image, info: dict) -> dict:
        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not self.params.has('device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = self.params.get('use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if self.params.get('search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                torch.sqrt(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
                                             self.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
        else:
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area /
                    self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=False).to(
                                                    self.params.device)

        # Initialize some learning things
        self.init_learning()

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:
            self.init_iou_net()

        # Initialize projection matrix
        self.init_projection_matrix(x)

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory
        self.init_memory(train_x)

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        out = {'time': time.time() - tic}
        return out
Пример #6
0
    def initialize(self, image1, image2, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # Check if image is color
        self.params.features.set_is_color(image1.shape[2] == 3)
        self.params.features.set_is_color(image2.shape[2] == 3)
        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        self.target_scale =  math.sqrt(search_area) / self.params.image_sample_size

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Set sizes
        self.img_sample_sz = torch.Tensor([self.params.image_sample_size, self.params.image_sample_size])
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        if getattr(self.params, 'score_upsample_factor', None) is None:
            self.output_sz = self.feature_sz[0]
        else:
            self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        self.params.score_fusion_strategy = getattr(self.params, 'score_fusion_strategy', 'default')
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)

            self.output_window = self.output_window.squeeze(0)
        # Convert image
        im1 = numpy_to_torch(image1)
        im2 = numpy_to_torch(image2)
        #self.im = im

        # Setup bounds
        self.image_sz = torch.Tensor([im1.shape[2], im1.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x1 = self.generate_init_samples(im1)
        x2 = self.generate_init_samples(im2)

        x = TensorList([torch.cat((v,i),1) for v, i in zip(x1, x2)])

        self.init_classifier(x)

        if self.use_iou_net:
            self.init_iou_net()

        # Init memory
        # self.init_memory(x)

        self.time += time.time() - tic
Пример #7
0
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Overwrite some parameters in the classifier. (These are not generally changed)
        self._overwrite_classifier_params(feature_dim=x.shape[-3])

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and self.params.get(
                'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1] * num)
            x = torch.cat([
                x,
                F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1),
                            p=prob,
                            training=True)
            ])

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))
        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor(
            [ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2

        # Construct output window
        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    (self.output_sz * self.params.effective_search_area /
                     self.params.search_area_scale).long(),
                    centered=True).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=True).to(
                                                    self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = self.params.get('net_opt_iter', None)

        # mask in Transformer
        self.transformer_label = prutils.gaussian_label_function(
            target_boxes.cpu().view(-1, 4),
            0.1,
            self.net.classifier.filter_size,
            self.feature_sz,
            self.img_sample_sz,
            end_pad_if_even=False)

        self.transformer_label = self.transformer_label.unsqueeze(1).cuda()
        self.x_clf = x

        self.transformer_memory, _ = self.net.classifier.transformer.encoder(
            self.x_clf.unsqueeze(1), pos=None)

        for i in range(x.shape[0]):
            _, cur_encoded_feat = self.net.classifier.transformer.decoder(
                x[i, ...].unsqueeze(0).unsqueeze(0),
                memory=self.transformer_memory,
                pos=self.transformer_label,
                query_pos=None)
            if i == 0:
                encoded_feat = cur_encoded_feat
            else:
                encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0)
        x = encoded_feat.contiguous()

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(
                x, target_boxes, num_iter=num_iter, compute_losses=plot_loss)

        # Init memory
        if self.params.get('update_classifier', True):
            self.init_memory(TensorList([x]))
        '''
Пример #8
0
    def initialize(self, image, state, gt, *args, **kwargs):
        if len(gt) == 8:
            ww = gt[2] - gt[0]
            hh = gt[7] - gt[1]
        else:
            ww = gt[2]
            hh = gt[3]
        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'
        if ww < 25 and hh < 25:
            self.feature_sz = TensorList([torch.Tensor([28., 28.])])
            self.output_layer = TensorList(['layer2'])
        else:
            self.feature_sz = TensorList([torch.Tensor([14., 14.])])
            #     self.output_layer = TensorList(['layer3'])
            self.output_layer = TensorList(['layer3'])
        # Initialize some stuff
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features(self.output_layer)

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])
        if state[3] > 50 or state[2] > 50:

            self.target_sz = torch.Tensor(
                [state[3] - state[3] / 8, state[2] - state[2] / 4])
        else:
            self.target_sz = torch.Tensor([state[3], state[2]])
        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if getattr(self.params, 'search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                torch.sqrt(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
                                             self.params.search_area_scale)
        else:
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
        else:
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area /
                    self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=False).to(
                                                    self.params.device)

        # Initialize some learning things
        self.init_learning()

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:
            self.init_iou_net()

        # Initialize projection matrix
        self.init_projection_matrix(x)

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory
        self.init_memory(train_x)

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        self.time += time.time() - tic
        self.pool1 = torch.nn.AdaptiveMaxPool2d((1, 224))
        self.pool2 = torch.nn.AdaptiveMaxPool2d((224, 1))
Пример #9
0
    def init_classifier_and_regressor(self, init_backbone_feat):
        # Get classification features
        x = self.net.get_backbone_clf_feat(init_backbone_feat)
        train_feat_18_cls = self.get_classification_features(init_backbone_feat)

        with torch.no_grad():
            train_feat_18 = self.net.pyramid_first_conv(x=None, x_backbone=x)
            train_feat_36 = self.net.pyramid_36(train_feat_18, init_backbone_feat['layer2'])
            train_feat_72 = self.net.pyramid_72(train_feat_36, init_backbone_feat['layer1'])

            train_feat_72_cls = self.net.classifier_72.extract_classification_feat(train_feat_72.
                                                                                   view(-1, *train_feat_72.shape[-3:]))
            train_feat_72_reg = self.net.regressor_72.extract_regression_feat(
                                                            feat_36=train_feat_36.view(-1, *train_feat_36.shape[-3:]),
                                                            feat_72=train_feat_72.view(-1, *train_feat_72.shape[-3:]))

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1]*num)
            train_feat_18_cls = torch.cat([train_feat_18_cls,
                                           F.dropout2d(train_feat_18_cls[0:1, ...].
                                                       expand(num, -1, -1, -1), p=prob, training=True)])
            train_feat_72_cls = torch.cat([train_feat_72_cls,
                                           F.dropout2d(train_feat_72_cls[0:1, ...].
                                                       expand(num, -1, -1, -1), p=prob,training=True)])
            train_feat_72_reg = torch.cat([train_feat_72_reg,
                                           F.dropout2d(train_feat_72_reg[0:1, ...].
                                                       expand(num, -1, -1, -1), p=prob,training=True)])

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        num_iter = getattr(self.params, 'net_opt_iter', None)
        num_iter_72 = getattr(self.params, 'net_opt_iter_72', None)
        reg_num_iter = getattr(self.params, 'reg_net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            # extract target_filter_72, target_filter_18 and target_reg_filter_72 using Clf and Reg model generators.
            self.target_filter_72, target_filters, losses = self.net.classifier_72.get_filter(train_feat_72_cls,
                                                                                              target_boxes,
                                                                                              num_iter=num_iter_72)
            self.target_filter_18, _, _ = self.net.classifier_18.get_filter(train_feat_18_cls,
                                                                            target_boxes,
                                                                            num_iter=num_iter)

            # get init_reg_filter using target sample and optimize filters using training samples
            target_feat_36 = train_feat_36.view(-1, *train_feat_36.shape[-3:])[0].unsqueeze(0)
            target_feat_72 = train_feat_72.view(-1, *train_feat_72.shape[-3:])[0].unsqueeze(0)
            target_bb = target_boxes[0].unsqueeze(0).clone()
            init_reg_filter = self.net.regressor_72.generate_init_filter(target_feat_36, target_feat_72, target_bb)

            if reg_num_iter > 0:
                self.target_reg_filter_72, _, reg_losses = self.net.regressor_72.generate_filter_optimizer(
                    init_reg_filter, train_feat_72_reg, target_boxes.view(-1, 4).clone(), num_iter=reg_num_iter)
            else:
                self.target_reg_filter_72 = init_reg_filter

            # get initial Clf and Reg model used in tracking process, which merge the initial model and the optimized model.
            self.init_target_filter_72 = self.target_filter_72
            self.init_target_filter_18 = self.target_filter_18
            self.init_reg_filter = init_reg_filter

        # Set feature size and other related sizes
        self.feature_sz_18 = torch.Tensor(list(x.shape[-2:]))
        ksz_18 = self.net.classifier_18.filter_size
        self.kernel_size_18 = torch.Tensor([ksz_18, ksz_18] if isinstance(ksz_18, (int, float)) else ksz_18)
        self.output_sz_18 = self.feature_sz_18 + (self.kernel_size_18 + 1) % 2

        self.feature_sz_72 = torch.Tensor(list(train_feat_72.shape[-2:]))
        ksz_72 = self.net.classifier_72.filter_size
        self.kernel_size_72 = torch.Tensor([ksz_72, ksz_72] if isinstance(ksz_72, (int, float)) else ksz_72)
        self.output_sz_72 = self.feature_sz_72 + (self.kernel_size_72 + 1) % 2
        self.output_sz = torch.Tensor([72, 72])

        # Construct output window
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale,
                    centered=False).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Init memory
        if getattr(self.params, 'update_classifier_and_regressor', True):
            self.init_memory(TensorList([train_feat_72_cls]),
                             TensorList([train_feat_18_cls]), TensorList([train_feat_72_reg]))