示例#1
0
    def get_new_train_data(self, labels, nums_to_merge, size_penalty,
                           lambda_part):
        u_feas, feature_avg, label_to_images, part_feas = self.generate_average_feature(
            labels)

        dists = self.calculate_distance(u_feas)
        if self.part_num > 1 and lambda_part > 1e-6:
            part_feas = np.swapaxes(part_feas, 0, 1)
            part_dists = []
            for i in range(self.part_num):
                part_fea = part_feas[i]
                dist = self.calculate_distance(part_fea)
                part_dists.append(dist)
            part_dists = torch.stack(part_dists)
            part_dists = torch.mean(part_dists, dim=0)
            dists = (1 - lambda_part) * dists + lambda_part * part_dists
        idx1, idx2 = self.select_merge_data(u_feas, labels, label_to_images,
                                            size_penalty, dists)

        new_train_data, labels = self.generate_new_train_data(
            idx1, idx2, labels, nums_to_merge)

        num_train_ids = len(np.unique(np.array(labels)))

        # change the criterion classifer
        self.criterion = ExLoss(self.embeding_fea_size, num_train_ids,
                                t=10).cuda()
        #new_classifier = fc_avg.astype(np.float32)
        #self.criterion.V = torch.from_numpy(new_classifier).cuda()

        return labels, new_train_data
示例#2
0
    def get_new_train_data(self, labels, nums_to_merge, size_penalty):
        # initializing useful indices
        self.label_to_images = {}
        for idx, l in enumerate(labels):
            self.label_to_images[l] = self.label_to_images.get(l, []) + [idx]
        self.sort_image_by_label = list(
            itertools.chain.from_iterable([
                self.label_to_images[key]
                for key in sorted(self.label_to_images.keys())
            ]))

        # calculate features, feature_avg, fc_avg for future use
        u_feas, feature_avg, fc_avg = self.generate_average_feature(labels)

        dists = self.calculate_distance(u_feas)

        idx1, idx2 = self.select_merge_data(u_feas, labels,
                                            self.label_to_images, size_penalty,
                                            dists)

        new_train_data, labels = self.generate_new_train_data(
            idx1, idx2, labels, nums_to_merge)

        num_train_ids = len(np.unique(np.array(labels)))

        # change the criterion classifer
        self.criterion = ExLoss(self.embeding_fea_size, num_train_ids,
                                t=10).cuda()
        new_classifier = fc_avg.astype(np.float32)
        self.criterion.V = torch.from_numpy(new_classifier).cuda()

        return labels, new_train_data
示例#3
0
    def get_new_train_data_v2(self, labels, nums_to_merge, step, penalty):
        self.label_to_images = {}
        for idx, l in enumerate(labels):
            self.label_to_images[l] = self.label_to_images.get(l, []) + [idx]
        self.sort_image_by_label = list(
            itertools.chain.from_iterable([
                self.label_to_images[key]
                for key in sorted(self.label_to_images.keys())
            ]))

        u_feas, feature_avg, fc_avg = self.generate_average_feature_v2(
            labels)  # [m 2048], [c 2018] [] [c 1024]
        # sort samples for a class based ranking
        #u_feas_sorted, labels_sorted = self.feat_sort(u_feas, labels)
        labels = np.array(labels, np.int64)
        u_feas_sorted = u_feas[self.sort_image_by_label]
        labels_sorted = labels[self.sort_image_by_label]
        np.savetxt('labels_{:d}'.format(step), labels_sorted)

        dist = self.calculate_distance(u_feas_sorted)
        # we have obtained the distance metric here for all samples
        # linkage calculation with sorted labels
        linkages, penalized_linkages = self.linkage_calculation(
            dist, labels_sorted, penalty)
        idx1, idx2 = self.select_merge_data_v2(u_feas_sorted, labels_sorted,
                                               linkages)
        new_train_data = self.generate_new_train_data_v2(
            idx1, idx2, nums_to_merge)
        num_train_ids = len(self.label_to_images)

        # change the criterion classifer
        self.criterion = ExLoss(self.embeding_fea_size, num_train_ids,
                                t=10).cuda()
        new_classifier = fc_avg.astype(np.float32)
        self.criterion.V = torch.from_numpy(new_classifier).cuda()
示例#4
0
    def get_new_train_data_dbc(self, labels, nums_to_merge, penalty):
        self.label_to_images = {}
        for idx, l in enumerate(labels):
            self.label_to_images[l] = self.label_to_images.get(l, []) + [idx]
        self.sort_image_by_label = list(
            itertools.chain.from_iterable([self.label_to_images[key] for key in sorted(self.label_to_images.keys())]))

        u_feas, feature_avg, fc_avg = self.generate_average_feature_v2(labels)
        labels = np.array(labels,np.int64)
        u_feas_sorted = u_feas[self.sort_image_by_label]
        labels_sorted = labels[self.sort_image_by_label]

        dist = self.calculate_distance(u_feas_sorted)
        linkages, penalized_linkages = self.linkage_calculation(dist, labels_sorted, penalty)
        if penalty > 0:
            idx1, idx2=self.select_merge_data_v2(u_feas_sorted, labels_sorted, penalized_linkages)
        else:
            idx1, idx2=self.select_merge_data_v2(u_feas_sorted, labels_sorted, linkages)
        new_train_data, labels = self.generate_new_train_data_dbc(idx1, idx2, nums_to_merge)
        num_train_ids = len(self.label_to_images)

        self.criterion = ExLoss(self.embeding_fea_size, num_train_ids, t=10).cuda()
        # new_classifier = fc_avg.astype(np.float32)
        # self.criterion.V = torch.from_numpy(new_classifier).cuda()
        return labels, new_train_data
示例#5
0
    def __init__(self, model_name, batch_size, num_classes, dataset, u_data, save_path, embeding_fea_size=1024,
                 dropout=0.5, max_frames=900, initial_steps=20, step_size=16):

        self.model_name = model_name
        self.num_classes = num_classes
        self.data_dir = dataset.images_dir
        self.is_video = dataset.is_video
        self.save_path = save_path

        self.dataset = dataset
        self.u_data = u_data
        self.u_label = np.array([label for _, label, _, _ in u_data])
        self.label_to_images = {}
        self.sort_image_by_label=[]

        self.dataloader_params = {}
        self.dataloader_params['height'] = 256
        self.dataloader_params['width'] = 128
        self.dataloader_params['batch_size'] = batch_size
        self.dataloader_params['workers'] = 6

        self.batch_size = batch_size
        self.data_height = 256
        self.data_width = 128
        self.data_workers = 6

        self.initial_steps = initial_steps
        self.step_size = step_size

        # batch size for eval mode. Default is 1.
        self.dropout = dropout
        self.max_frames = max_frames
        self.embeding_fea_size = embeding_fea_size

        if self.is_video:
            self.eval_bs = 1
            self.fixed_layer = True
            self.frames_per_video = 16
            self.later_steps = 5
        else:
            self.eval_bs = 128
            self.fixed_layer = False
            self.frames_per_video = 1
            self.later_steps = 2

        if self.model_name == 'avg_pool':
            model = models.create(self.model_name, dropout=self.dropout, 
                              embeding_fea_size=self.embeding_fea_size, fixed_layer=self.fixed_layer)
        else:
            model = models.create(self.model_name, embed_dim=self.embeding_fea_size, 
                              dropout=self.dropout, fix_part_layers=self.fixed_layer)
        self.model = nn.DataParallel(model).cuda()

        self.criterion = ExLoss(self.embeding_fea_size, self.num_classes, t=10).cuda()
示例#6
0
    def get_new_train_data(self, labels, nums_to_merge, size_penalty):
        u_feas, fcs, label_to_images, fc_avg = self.generate_average_feature(labels)
        
        dists = self.calculate_distance(u_feas)
        
        idx1, idx2 = self.select_merge_data(u_feas, labels, label_to_images, size_penalty,dists)
        
        new_train_data, labels = self.generate_new_train_data(idx1, idx2, labels,nums_to_merge, fcs)
        
        num_train_ids = len(np.unique(np.array(labels)))

        self.criterion = ExLoss(self.embeding_fea_size, num_train_ids, t=10).cuda()

        return labels, new_train_data
    def softmax_train(self, train_data, unselected_data, step, epochs, step_size, init_lr, dropout, loss):

        """ create model and dataloader """
        model = models.create(self.model_name, dropout=self.dropout, num_classes=self.num_classes, 
                              embeding_fea_size=self.embeding_fea_size, classifier = loss, fixed_layer=self.fixed_layer)

        model = nn.DataParallel(model).cuda()

        # the base parameters for the backbone (e.g. ResNet50)
        base_param_ids = set(map(id, model.module.CNN.base.parameters())) 
        base_params_need_for_grad = filter(lambda p: p.requires_grad, model.module.CNN.base.parameters()) 
        new_params = [p for p in model.parameters() if id(p) not in base_param_ids]

        # set the learning rate for backbone to be 0.1 times
        param_groups = [
            {'params': base_params_need_for_grad, 'lr_mult': 0.1},
            {'params': new_params, 'lr_mult': 1.0}]


        exclusive_criterion = ExLoss(self.embeding_fea_size, len(unselected_data) , t=10).cuda()

        optimizer = torch.optim.SGD(param_groups, lr=init_lr, momentum=self.train_momentum, weight_decay = 5e-4, nesterov=True)

        # change the learning rate by step
        def adjust_lr(epoch, step_size):
            
            use_unselcted_data = True
            lr = init_lr / (10 ** (epoch // step_size))
            for g in optimizer.param_groups:
                g['lr'] = lr * g.get('lr_mult', 1)
            if epoch >= step_size:
                use_unselcted_data = False
                # print("Epoch {}, CE loss, current lr {}".format(epoch, lr))
            return use_unselcted_data


        s_dataloader = self.get_dataloader(train_data, training=True, is_ulabeled=False)
        u_dataloader = self.get_dataloader(unselected_data, training=True, is_ulabeled=True)


        """ main training process """
        trainer = Trainer(model, exclusive_criterion, fixed_layer=self.fixed_layer, lamda = self.lamda)
        for epoch in range(epochs):
            use_unselcted_data = adjust_lr(epoch, step_size)
            trainer.train(epoch, s_dataloader, u_dataloader, optimizer, use_unselcted_data, print_freq=len(s_dataloader)//2)

        ckpt_file = osp.join(self.save_path,  "step_{}.ckpt".format(step))
        torch.save(model.state_dict(), ckpt_file)
        self.model = model
    def get_new_train_data(self, labels, nums_to_merge, size_penalty):
        u_feas, feature_avg, label_to_images, fc_avg = self.generate_average_feature(labels)
        
        dists = self.calculate_distance(u_feas)
        
        idx1, idx2 = self.select_merge_data(u_feas, labels, label_to_images, size_penalty,dists)
        
        new_train_data, labels = self.generate_new_train_data(idx1, idx2, labels,nums_to_merge)
        
        num_train_ids = len(np.unique(np.array(labels)))

        # change the criterion classifer
        self.criterion = ExLoss(self.embeding_fea_size, num_train_ids, t=10).cuda()
        new_classifier = fc_avg.astype(np.float32)
        self.criterion.V = torch.from_numpy(new_classifier).cuda()

        return labels, new_train_data
示例#9
0
    def __init__(self,
                 model_name,
                 batch_size,
                 num_classes,
                 dataset,
                 u_data,
                 save_path,
                 embeding_fea_size=1024,
                 dropout=0.5,
                 max_frames=900,
                 initial_steps=20,
                 step_size=16):

        self.model_name = model_name  # model_name = 'avg_pool'
        self.num_classes = num_classes  # 训练集数量
        self.data_dir = dataset.images_dir  # data_dir = dataset_all.image_dir
        self.is_video = dataset.is_video  # is_video = dataset_all.is_video
        self.save_path = save_path  # save_path = os.path.join(working_dir,'logs') = D:/GitHub/BUC/logs

        self.dataset = dataset  # 'market1501'
        self.u_data = u_data  # u_data = change_to_unlabel(dataset_all)返回值
        self.u_label = np.array([label for _, label, _, _ in u_data])
        # _为不重要的变量的占位符  取出u_data中的label作为u_label
        self.dataloader_params = {}
        self.dataloader_params['height'] = 256
        self.dataloader_params['width'] = 128
        self.dataloader_params['batch_size'] = batch_size
        self.dataloader_params['workers'] = 6

        self.batch_size = batch_size  # batch_size = 16
        self.data_height = 256
        self.data_width = 128
        self.data_workers = 6

        self.initial_steps = initial_steps  # initial_steps = 20
        self.step_size = step_size  # step_size = 16

        # batch size for eval mode. Default is 1.
        self.dropout = dropout  # dropout = 0.5
        self.max_frames = max_frames  # max_frames = 900
        self.embeding_fea_size = embeding_fea_size  # embedding_fea_size = 1024

        if self.is_video:
            self.eval_bs = 1
            self.fixed_layer = True
            self.frames_per_video = 16
            self.later_steps = 5
        else:
            self.eval_bs = 64  # eval_bs:evaluators_batchsize
            self.fixed_layer = False
            self.frames_per_video = 1  # 图片
            self.later_steps = 2  # 后面的步骤数为2

        model = models.create(
            self.model_name,
            dropout=self.dropout,  # model_name = 'avg_pool', dropout = 0.5
            embeding_fea_size=self.embeding_fea_size,
            fixed_layer=self.fixed_layer
        )  # embedding_fea_size = 1024  # fixed_layer = False
        self.model = nn.DataParallel(model).cuda()

        self.criterion = ExLoss(self.embeding_fea_size, self.num_classes,
                                t=10).cuda()