示例#1
0
    def init_projection_matrix(self, x):
        # Set if using projection matrix
        # print('init_projection_matrix',torch.rand(2))
        self.params.use_projection_matrix = getattr(self.params,
                                                    'use_projection_matrix',
                                                    True)

        if self.params.use_projection_matrix:
            self.compressed_dim = self.fparams.attribute(
                'compressed_dim', None)

            proj_init_method = getattr(self.params, 'proj_init_method', 'pca')
            if proj_init_method == 'pca':
                x_mat = TensorList([
                    e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone()
                    for e in x
                ])
                x_mat -= x_mat.mean(dim=1, keepdim=True)
                cov_x = x_mat @ x_mat.t()
                self.projection_matrix = TensorList([
                    None if cdim is None else torch.svd(C)[0]
                    [:, :cdim].t().unsqueeze(-1).unsqueeze(-1).clone()
                    for C, cdim in zip(cov_x, self.compressed_dim)
                ])
            elif proj_init_method == 'randn':
                self.projection_matrix = TensorList([
                    None if cdim is None else ex.new_zeros(
                        cdim, ex.shape[1], 1, 1).normal_(
                            0, 1 / math.sqrt(ex.shape[1]))
                    for ex, cdim in zip(x, self.compressed_dim)
                ])
        else:
            self.compressed_dim = x.size(1)
            self.projection_matrix = TensorList([None] * len(x))
示例#2
0
    def initialize(self, image, info: dict) -> dict:

        initSeed = 1
        torch.manual_seed(initSeed)
        torch.cuda.manual_seed(initSeed)
        torch.cuda.manual_seed_all(initSeed)
        np.random.seed(initSeed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        os.environ['PYTHONHASHSEED'] = str(initSeed)
        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # metricnet
        self.metric_model = model_load(self.params.metric_model_path)
        # warmup start
        with torch.no_grad():
            tmp = np.random.rand(5, 3, 107, 107)
            tmp = torch.Tensor(tmp)
            tmp = (Variable(tmp)).type(torch.FloatTensor).cuda()
            tmp = self.metric_model(tmp)
            # warmup end
            self.target_metric_feature = get_target_feature(
                self.metric_model, np.array(state), np.array(image))
        pos_generator = SampleGenerator(
            'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3)
        gt_pos_examples = pos_generator(
            np.array(state).astype(np.int), 20, [0.7, 1])
        gt_iou = 0.7
        while gt_pos_examples.shape[0] == 0:
            gt_iou = gt_iou - 0.1
            gt_pos_examples = pos_generator(
                np.array(state).astype(np.int), 20, [gt_iou, 1])
        # print('gt-iou:', gt_iou)
        # self.gt_pos_features = get_anchor_feature(self.metric_model, np.array(image), gt_pos_examples).cpu().detach().numpy()
        with torch.no_grad():
            gt_pos_features0 = get_anchor_feature(self.metric_model,
                                                  np.array(image),
                                                  gt_pos_examples)
            gt_pos_features = gt_pos_features0.cpu().detach().numpy()
            target_metric_feature = self.target_metric_feature.repeat(
                gt_pos_features.shape[0], 1)
            pos_all = torch.norm(gt_pos_features0 - target_metric_feature,
                                 2,
                                 dim=1).view(-1)
            self.similar = pos_all.mean() * self.params.sim_rate
            print('similarThresh', self.similar)
        self.target_features_all = []
        self.target_features_all.append(self.target_metric_feature)
        self.clf = lof_fit(gt_pos_features, k=5)

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
                                 self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
                                          self.params.min_image_sample_size)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(
            torch.sqrt(
                torch.prod(self.base_target_sz *
                           self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
            2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList(
            [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([
            dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                   self.params.interpolation_bicubic_a,
                                   self.params.interpolation_centering,
                                   self.params.interpolation_windowing,
                                   self.params.device) for sz in self.filter_sz
        ])

        # Get regularization filter
        self.reg_filter = TensorList([
            dcf.get_reg_filter(self.img_support_sz, self.base_target_sz,
                               fparams).to(self.params.device)
            for fparams in self.fparams
        ])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(
            self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([
            dcf.label_function(sz, sig).to(self.params.device)
            for sz, sig in zip(self.filter_sz, sigma)
        ])

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList(
            [e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([
            torch.svd(C)[0][:, :cdim].clone()
            for C, cdim in zip(cov_x, self.compressed_dim)
        ])

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                    continue
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(
                        shift) / self.img_support_sz
                    xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i,
                                                               ...],
                                                            shift=shift_samp)

        # Shift sample
        shift_samp = 2 * math.pi * (self.pos - self.pos.round()) / (
            self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList(
            [xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None] * len(self.num_stored_samples)
        self.sample_weights = TensorList(
            [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights,
                                    self.init_sample_weights,
                                    num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList([
            xf.new_zeros(xf.shape[2], xf.shape[3],
                         self.params.sample_memory_size, cdim, 2)
            for xf, cdim in zip(train_xf, self.compressed_dim)
        ])

        # Initialize filter
        self.filter = TensorList([
            xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2)
            for xf, cdim in zip(train_xf, self.compressed_dim)
        ])

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples,
                                                   self.yf, self.reg_filter,
                                                   self.projection_matrix,
                                                   self.params,
                                                   self.init_sample_weights)
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem,
                                             joint_var,
                                             debug=(self.params.debug >= 1),
                                             visdom=self.visdom)

        if self.params.update_projection_matrix:
            self.joint_optimizer.run(
                self.params.init_CG_iter // self.params.init_GN_iter,
                self.params.init_GN_iter)

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples,
                                            self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples,
                                         compressed_samples):
            train_samp[:, :, :init_samp.shape[2], :, :] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples,
                                       self.yf, self.sample_weights,
                                       self.reg_filter)
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone(
        )

        if not self.params.update_projection_matrix:
            self.filter_optimizer.run(self.params.init_CG_iter)

        # Post optimization
        self.filter_optimizer.run(self.params.post_init_CG_iter)

        self.symmetrize_filter()

        # metricnet_lof
        self.current_target_metric_feature = []
        self.train_xf = []
        # self.iou=[]
        # self.lof_thresh=3.5

        self.lof_thresh = self.params.lof_rate
示例#3
0
    def initialize(self, image, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features
        self.initialize_features()

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.min_image_sample_size)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(torch.sqrt(torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz    # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                                self.params.interpolation_bicubic_a, self.params.interpolation_centering,
                                                self.params.interpolation_windowing, self.params.device) for sz in self.filter_sz])

        # Get regularization filter
        self.reg_filter = TensorList([dcf.get_reg_filter(self.img_support_sz, self.base_target_sz, fparams).to(self.params.device)
                                      for fparams in self.fparams])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([dcf.label_function(sz, sig).to(self.params.device) for sz, sig in zip(self.filter_sz, sigma)])

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute('learning_rate')
        if self.params.CG_forgetting_rate is None or max(self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (1 - max(self.params.precond_learning_rate))**self.params.CG_forgetting_rate


        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList([e.permute(1,0,2,3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([torch.svd(C)[0][:,:cdim].clone() for C, cdim in zip(cov_x, self.compressed_dim)])

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                    continue
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(shift) / self.img_support_sz
                    xf[1+i:2+i,...] = fourier.shift_fs(xf[1+i:2+i,...], shift=shift_samp)

        # Shift sample
        shift_samp = 2*math.pi * (self.pos - self.pos.round()) / (self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList([xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)


        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None]*len(self.num_stored_samples)
        self.sample_weights = TensorList([xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList(
            [xf.new_zeros(xf.shape[2], xf.shape[3], self.params.sample_memory_size, cdim, 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Initialize filter
        self.filter = TensorList(
            [xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples, self.yf, self.reg_filter, self.projection_matrix, self.params, self.init_sample_weights)
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug>=3))

        if self.params.update_projection_matrix:
            self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples, self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples, compressed_samples):
            train_samp[:,:,:init_samp.shape[2],:,:] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples, self.yf, self.sample_weights, self.reg_filter)
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone()

        if not self.params.update_projection_matrix:
            self.filter_optimizer.run(self.params.init_CG_iter)

        # Post optimization
        self.filter_optimizer.run(self.params.post_init_CG_iter)

        self.symmetrize_filter()