Пример #1
0
    def after_step(self, current_step):
        # import ipdb; ipdb.set_trace()
        if current_step % self.engine.config.TRAIN.eval_step == 0 and current_step!= 0:
            self.engine.logger.info('Start clsuter the feature')
            frame_num = self.engine.config.DATASET.train.clip_length
            frame_step = self.engine.config.DATASET.train.clip_step
            feature_record = []
            for video_name in self.engine.cluster_dataset_keys:
                dataset = self.engine.cluster_dataset_dict[video_name]
                data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1)
                # import ipdb; ipdb.set_trace()
                for test_input, anno, meta in data_loader:
                    future = data[:, :, 2, :, :].cuda() # t+1 frame 
                    current = data[:, :, 1, :, :].cuda() # t frame
                    past = data[:, :, 0, :, :].cuda() # t frame
                    bboxs = get_batch_dets(self.engine.Detector, current)
                    for index, bbox in enumerate(bboxs):
                        # import ipdb; ipdb.set_trace()
                        if bbox.numel() == 0:
                            # import ipdb; ipdb.set_trace()
                            # bbox = torch.zeros([1,4])
                            bbox = bbox.new_zeros([1,4])
                            # print('NO objects')
                            # continue
                        # import ipdb; ipdb.set_trace()
                        current_object, _ = multi_obj_grid_crop(current[index], bbox)
                        future_object, _ = multi_obj_grid_crop(future[index], bbox)
                        future2current = torch.stack([future_object, current_object], dim=1)

                        past_object, _ = multi_obj_grid_crop(past[index], bbox)
                        current2past = torch.stack([current_object, past_object], dim=1)

                        _, _, A_input = frame_gradient(future2current)
                        A_input = A_input.sum(1)
                        _, _, C_input = frame_gradient(current2past)
                        C_input = C_input.sum(1)
                        A_feature, _, _ = self.engine.A(A_input)
                        B_feature, _, _ = self.engine.B(current_object)
                        C_feature, _, _ = self.engine.C(C_input)
                        
                        A_flatten_feature = A_feature.flatten(start_dim=1)
                        B_flatten_feature = B_feature.flatten(start_dim=1)
                        C_flatten_feature = C_feature.flatten(start_dim=1)
                        ABC_feature = torch.cat([A_flatten_feature, B_flatten_feature, C_flatten_feature], dim=1).detach()
                        # import ipdb; ipdb.set_trace()
                        ABC_feature_s = torch.chunk(ABC_feature, ABC_feature.size(0), dim=0)
                        # feature_record.extend(ABC_feature_s)
                        for abc_f in ABC_feature_s:
                            temp = abc_f.squeeze(0).cpu().numpy()
                            feature_record.append(temp)
                        # import ipdb; ipdb.set_trace()
                self.engine.logger.info(f'Finish the video:{video_name}')
            self.engine.logger.info(f'Finish extract feature, the sample:{len(feature_record)}')
            device = torch.device('cuda:0')
            cluster_input = torch.from_numpy(np.array(feature_record))
            # cluster_input = np.array(feature_record)
            time = mmcv.Timer()
            # import ipdb; ipdb.set_trace()
            cluster_centers = cluster_input.new_zeros(size=[self.engine.config.TRAIN.cluster.k, 3072])
            cluster_score = 0.0
            cluster_model = None
            for _ in range(1):
                # model = KMeans(n_clusters=self.trainer.config.TRAIN.cluster.k, init='k-means++',n_init=10, algorithm='full',max_iter=300).fit(cluster_input)
                # labels = model.labels_
                # temp = calinski_harabaz_score(cluster_input, labels)
                # if temp > cluster_score:
                    # cluster_model = model
                # print(f'the temp score is {temp}')
                cluster_ids_x, cluster_center = kmeans(X=cluster_input, num_clusters=self.engine.config.TRAIN.cluster.k, distance='euclidean', device=device)
                cluster_centers += cluster_center
            # import ipdb; ipdb.set_trace()
            # cluster_centers =  cluster_centers / 10
            # model.fit(cluster_input)
            # pusedo_labels = model.predict(cluster_input)
            pusedo_labels = kmeans_predict(cluster_input, cluster_centers, 'euclidean', device=device).detach().cpu().numpy()
            # pusedo_labels = cluster_model.labels_
            print(f'The cluster time is :{time.since_start()/60} min')
            # import ipdb; ipdb.set_trace()
            # pusedo_labels = np.split(pusedo_labels, pusedo_labels.shape[0], 0)

            pusedo_dataset = os.path.join(self.engine.config.TRAIN.pusedo_data_path, 'pusedo')
            if not os.path.exists(pusedo_dataset):
                os.mkdir(pusedo_dataset)
            
            np.savez_compressed(os.path.join(pusedo_dataset, f'{self.engine.config.DATASET.name}_dummy.npz'), data=cluster_input, label=pusedo_labels)
            print(f'The save time is {time.since_last_check() / 60} min')
            # binary_labels = MultiLabelBinarizer().fit_transform(pusedo_labels)
            # self.trainer.ovr_model = OneVsRestClassifier(LinearSVC(random_state = 0)).fit(cluster_input,binary_labels)
            # self.trainer.ovr_model = OneVsRestClassifier(LinearSVC(random_state = 0), n_jobs=16).fit(cluster_input, pusedo_labels)
            self.engine.ovr_model = self.engine.ovr_model.fit(cluster_input, pusedo_labels)
            # self.trainer.saved_model['OVR'] = self.trainer.ovr_model
            print(f'The train ovr: {time.since_last_check() / 60} min')
            joblib.dump(self.engine.ovr_model, self.engine.ovr_model_path)
Пример #2
0
    def train(self,current_step):
        # Pytorch [N, C, D, H, W]
        # initialize
        start = time.time()
        self.set_requires_grad(self.A, True)
        self.set_requires_grad(self.B, True)
        self.set_requires_grad(self.C, True)
        self.set_requires_grad(self.Detector, False)
        self.A.train()
        self.B.train()
        self.C.train()
        self.Detector.eval()
        writer = self.kwargs['writer_dict']['writer']
        global_steps = self.kwargs['writer_dict']['global_steps_{}'.format(self.kwargs['model_type'])]

        # get the data
        data, anno, meta = next(self._train_loader_iter)  # the core for dataloader
        self.data_time.update(time.time() - start)
        
        # base on the D to get each frame
        # in this method, D = 3 and not change
        future = data[:, :, -1, :, :].cuda() # t+1 frame 
        current = data[:, :, 1, :, :].cuda() # t frame
        past = data[:, :, 0, :, :].cuda() # t-1 frame

        bboxs = get_batch_dets(self.Detector, current)
        # this method is based on the objects to train the model insted of frames
        for index, bbox in enumerate(bboxs):
            if bbox.numel() == 0:
                bbox = bbox.new_zeros([1, 4])
            # get the crop objects
            input_currentObject_B, _ = multi_obj_grid_crop(current[index], bbox)
            future_object, _ = multi_obj_grid_crop(future[index], bbox)
            # future2current = torch.stack([future_object, input_currentObject_B], dim=1)
            current2future = torch.stack([input_currentObject_B, future_object], dim=1)
            past_object, _ = multi_obj_grid_crop(past[index], bbox)
            # current2past = torch.stack([input_currentObject_B, past_object], dim=1)
            past2current = torch.stack([past_object, input_currentObject_B], dim=1)

            _, _, input_objectGradient_A = frame_gradient(current2future)
            input_objectGradient_A = input_objectGradient_A.sum(1)
            _, _, input_objectGradient_C = frame_gradient(past2current)
            input_objectGradient_C = input_objectGradient_C.sum(1)
            # import ipdb; ipdb.set_trace()
            # True Process =================Start===================
            # original_A = (0.3 * input_objectGradient_A[:,0] + 0.59 * input_objectGradient_A[:,1] + 0.11 * input_objectGradient_A[:,2]).unsqueeze(1)
            # original_B = (0.3 * input_currentObject_B[:,0] + 0.59 * input_currentObject_B[:,1] + 0.11 * input_currentObject_B[:,2]).unsqueeze(1)
            # original_C = (0.3 * input_objectGradient_C[:,0] + 0.59 * input_objectGradient_C[:,1] + 0.11 * input_objectGradient_C[:,2]).unsqueeze(1)
            _, output_recGradient_A, original_A = self.A(input_objectGradient_A)
            _, output_recObject_B, original_B = self.B(input_currentObject_B)
            _, output_recGradient_C, original_C = self.C(input_objectGradient_C)
            # import ipdb; ipdb.set_trace()
            # loss_A = self.a_loss(output_recGradient_A, input_objectGradient_A)
            # loss_B = self.b_loss(output_recObject_B, input_currentObject_B)
            # loss_C = self.c_loss(output_recGradient_C, input_objectGradient_C)
            loss_A = self.ALoss(output_recGradient_A, original_A)
            loss_B = self.BLoss(output_recObject_B, original_B)
            loss_C = self.CLoss(output_recGradient_C, original_C)

            loss_all = self.loss_lamada['ALoss'] * loss_A + self.loss_lamada['BLoss'] * loss_B + self.loss_lamada['CLoss'] * loss_C
            self.optimizer_ABC.zero_grad()
            loss_all.backward()
            self.optimizer_ABC.step()
            # record
            self.loss_meter_ABC.update(loss_all.detach())
            if self.config.TRAIN.general.scheduler.use:
                self.optimizer_ABC_scheduler.step()
            
            # ======================End==================

        self.batch_time.update(time.time() - start)

        if (current_step % self.steps.param['log'] == 0):
            msg = make_info_message(current_step, self.steps.param['max'], self.kwargs['model_type'], self.batch_time, self.config.TRAIN.batch_size, self.data_time, [self.loss_meter_ABC])
            self.logger.info(msg)
        writer.add_scalar('Train_loss_ABC', self.loss_meter_ABC.val, global_steps)

        if (current_step % self.steps.param['vis'] == 0):
            vis_objects = OrderedDict({
                'train_input_objectGradient_A': input_objectGradient_A.detach(),
                'train_input_currentObject_B': input_currentObject_B.detach(),
                'train_input_objectGradient_C': input_objectGradient_C.detach(),
                'train_output_recGradient_A': output_recGradient_A.detach(),
                'train_output_recObject_B': output_recObject_B.detach(),
                'train_output_recGradient_C': output_recGradient_C.detach()
            })
            tensorboard_vis_images(vis_objects, writer, global_steps, self.normalize.param['train'])
        global_steps += 1 
        # reset start
        start = time.time()
        
        # self.saved_model = {'A':self.A, 'B':self.B, 'C':self.C}
        # self.saved_model['A'] = self.A
        # self.saved_model['B'] = self.B
        # self.saved_model['C'] = self.C
        # # self.saved_optimizer = {'optim_ABC': self.optim_ABC}
        # self.saved_optimizer['optimizer_ABC'] = self.optimizer_ABC
        # # self.saved_loss = {'loss_ABC':self.loss_meter_ABC.val}
        # self.saved_loss['loss_ABC'] = self.loss_meter_ABC.val

        self.saved_stuff['step'] = global_steps
        self.saved_stuff['loss'] = self.loss_meter_ABC.val
        self.saved_stuff['A'] = self.A
        self.saved_stuff['B'] = self.B
        self.saved_stuff['C'] = self.C
        self.saved_stuff['optimizer_ABC'] = self.optimizer_ABC

        self.kwargs['writer_dict']['global_steps_{}'.format(self.kwargs['model_type'])] = global_steps
Пример #3
0
    def evaluate(self, current_step):
        '''
        Evaluate the results of the model
        !!! Will change, e.g. accuracy, mAP.....
        !!! Or can call other methods written by the official
        '''
        # self.trainer.set_requires_grad(self.trainer.A, False)
        # self.trainer.set_requires_grad(self.trainer.B, False)
        # self.trainer.set_requires_grad(self.trainer.C, False)
        # self.trainer.set_requires_grad(self.trainer.Detector, False)
        # self.trainer.A.eval()
        # self.trainer.B.eval()
        # self.trainer.C.eval()
        # self.trainer.Detector.eval()
        self.engine.set_all(False)
        frame_num = self.engine.config.DATASET.test_clip_length
        tb_writer = self.engine.kwargs['writer_dict']['writer']
        global_steps = self.engine.kwargs['writer_dict']['global_steps_{}'.format(self.engine.kwargs['model_type'])]
        score_records = []
        # psnr_records = []
        total = 0
        random_video_sn = torch.randint(0, len(self.engine.test_dataset_keys), (1,))
        # random_video_sn = 0
        for sn, video_name in enumerate(self.engine.test_dataset_keys):
            # _temp_test_folder = os.path.join(self.testing_data_folder, dir)
            # need to improve
            # dataset = AvenueTestOld(_temp_test_folder, clip_length=frame_num)
            dataset = self.engine.test_dataset_dict[video_name]
            len_dataset = dataset.pics_len
            test_iters = len_dataset - frame_num + 1
            test_counter = 0
            # feature_record = []
        
            data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1)
            # import ipdb; ipdb.set_trace()
            scores = np.empty(shape=(len_dataset,),dtype=np.float32)
            # for test_input, _ in data_loader:
            random_frame_sn = torch.randint(0, len_dataset,(1,))
            for frame_sn, (test_input, anno, meta) in enumerate(data_loader):
                feature_record_object = []
                future = test_input[:, :, 2, :, :].cuda()
                current = test_input[:, :, 1, :, :].cuda()
                past = test_input[:, :, 0, :, :].cuda()
                bboxs = get_batch_dets(self.engine.Detector, current)
                for index, bbox in enumerate(bboxs):
                    # import ipdb; ipdb.set_trace()
                    if bbox.numel() == 0:
                        bbox = bbox.new_zeros([1,4])
                        # print('NO objects')
                        # continue
                    current_object, _ = multi_obj_grid_crop(current[index], bbox)
                    future_object, _ = multi_obj_grid_crop(future[index], bbox)
                    future2current = torch.stack([future_object, current_object], dim=1)

                    past_object, _ = multi_obj_grid_crop(past[index], bbox)
                    current2past = torch.stack([current_object, past_object], dim=1)

                    _, _, A_input = frame_gradient(future2current)
                    A_input = A_input.sum(1)
                    _, _, C_input = frame_gradient(current2past)
                    C_input = C_input.sum(1)
                    A_feature, temp_a, _ = self.engine.A(A_input)
                    B_feature, temp_b, _ = self.engine.B(current_object)
                    C_feature, temp_c, _ = self.engine.C(C_input)

                    # import ipdb; ipdb.set_trace()
                    if sn == random_video_sn and frame_sn == random_frame_sn:
                        vis_objects = OrderedDict({
                            'eval_oc_input_a': A_input.detach(),
                            'eval_oc_output_a': temp_a.detach(),
                            'eval_oc_input_b': current_object.detach(),
                            'eval_oc_output_b':  temp_b.detach(),
                            'eval_oc_input_c': C_input.detach(),
                            'eval_oc_output_c': temp_c.detach(),
                        })
                        tensorboard_vis_images(vis_objects, tb_writer, global_steps, normalize=self.engine.normalize.param['val'])

                    A_flatten_feature = A_feature.flatten(start_dim=1)
                    B_flatten_feature = B_feature.flatten(start_dim=1)
                    C_flatten_feature = C_feature.flatten(start_dim=1)
                    ABC_feature = torch.cat([A_flatten_feature, B_flatten_feature, C_flatten_feature], dim=1).detach()
                    ABC_feature_s = torch.chunk(ABC_feature, ABC_feature.size(0), dim=0)

                    for abc_f in ABC_feature_s:
                        temp = abc_f.squeeze(0).cpu().numpy()
                        feature_record_object.append(temp)
                
                predict_input = np.array(feature_record_object)
                self.engine.ovr_model = joblib.load(self.engine.ovr_model_path)
                g_i = self.engine.ovr_model.decision_function(predict_input) # the svm score of each object in one frame
                frame_score = oc_score(g_i)

                # test_psnr = psnr_error(g_output, test_target)
                # test_psnr = test_psnr.tolist()
                scores[test_counter+frame_num-1] = frame_score

                test_counter += 1
                total+=1
                if test_counter >= test_iters:
                    scores[:frame_num-1]=scores[frame_num-1]
                    score_records.append(scores)
                    print(f'finish test video set {video_name}')
                    break
        
        self.engine.pkl_path = save_score_results(self.engine.config, self.engine.logger, verbose=self.engine.verbose, config_name=self.engine.config_name, current_step=current_step, time_stamp=self.engine.kwargs["time_stamp"],score=score_records)
        results = self.engine.evaluate_function(self.engine.pkl_path, self.engine.logger, self.engine.config)
        self.engine.logger.info(results)
        tb_writer.add_text('AUC of ROC curve', f'AUC is {results.auc:.5f}',global_steps)
        return results.auc