def train_single_model(self, train_dir, train_csv, val_dir, val_csv, epochs):

        train_part = pd.read_csv(train_csv).values  # array type
        val_part = pd.read_csv(val_csv).values

        train_dataset = utils.DYDataSet(
            train_dir,
            train_part,
            utils.get_transforms(
                mode='train', input_size=self.input_size, resize_size=self.input_size+42)
        )
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=4,
            pin_memory=True,
            sampler=None)
        val_dataset = utils.DYDataSet(
            val_dir,
            val_part,
            utils.get_transforms(mode='valid', input_size=self.input_size, resize_size=self.input_size+42))
        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=4,
            pin_memory=True)

        print('[+] trainning with total %d images' % len(train_dataset))

        self.model = get_model(self.model_name, pretrained=True)
        criterion = torch.nn.CrossEntropyLoss().cuda()
        utils.train(self.model, train_loader, val_loader, criterion,
                    checkpoint_file=self.checkpoint_file, epochs=epochs)
Пример #2
0
def extract_features(feature_extractor, data_dir, data_csv, prediction_file_path):

    print('[+] Using Ten-Crop Extracting strategy')

    transform = utils.get_transforms(
        mode='test', input_size=args.input_size, resize_size=args.input_size+args.add_size)

    data_array = pd.read_csv(data_csv).values
    dataset = utils.DYDataSet(
        data_dir,
        data_array,
        transform
    )
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True)

    feature_extractor = torch.nn.DataParallel(feature_extractor).cuda()
    feature_extractor.eval()

    all_labels = []
    all_fts = []

    with torch.no_grad():

        print('extracting total %d images' % len(dataset))
        for i, (input, labels) in enumerate(data_loader):  # tensor type

            print('extracting batch: %d/%d' %
                  (i, len(dataset)/args.batch_size))

            bs, ncrops, c, h, w = input.size()
            input = input.view(-1, c, h, w).cuda()
            output = feature_extractor(input)
            output = output.view(
                bs, ncrops, -1).mean(1).view(bs, -1)  # view to 2-D tensor
            all_labels.append(labels)
            all_fts.append(output.data.cpu())

            if((i+1) % 800 == 0):
                all_labels = torch.cat(
                    all_labels, dim=0).numpy().reshape(-1, 1)
                all_fts = torch.cat(all_fts, dim=0).numpy()

                print(f'[+] features shape: {all_fts.shape}')

                res = np.concatenate((all_fts, all_labels), axis=1)
                print(f'[+] save npy shape: {res.shape}')

                part = (i+1)/800
                fts_file_name = prediction_file_path+'.' + str(part)
                print('[+] writing fts file: %s, part %d ...' %
                      (fts_file_name, part))
                np.save(fts_file_name, res)

                all_labels = []
                all_fts = []

        all_labels = torch.cat(
            all_labels, dim=0).numpy().reshape(-1, 1)
        all_fts = torch.cat(all_fts, dim=0).numpy()

        print(f'[+] features shape: {all_fts.shape}')

        res = np.concatenate((all_fts, all_labels), axis=1)
        print(f'[+] save npy shape: {res.shape}')

        part = (int(len(dataset)/args.batch_size))/800+1
        fts_file_name = prediction_file_path+'.' + str(part)
        print('[+] writing fts file: %s, part %d ...' %
              (fts_file_name, part))
        np.save(fts_file_name, res)
Пример #3
0
    def test_single_model(self,
                          checkpoint_file,
                          test_dir,
                          test_csv,
                          prediction_file_path='test_prediction.npy',
                          ten_crop=False,
                          prob=False):
        print('[+] checkpoint file:{0:s}'.format(checkpoint_file))
        transform = utils.get_transforms(mode='valid',
                                         input_size=self.input_size,
                                         resize_size=self.input_size +
                                         self.add_size)

        if (ten_crop):
            print('[+] Using Ten-Crop Testting strategy')
            transform = utils.get_transforms(mode='test',
                                             input_size=self.input_size,
                                             resize_size=self.input_size +
                                             self.add_size)

        # get the value of pd.DataFrame object
        test_array = pd.read_csv(test_csv).values
        test_dataset = utils.DYDataSet(test_dir, test_array, transform)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=self.batch_size,
                                                  shuffle=False,
                                                  num_workers=4,
                                                  pin_memory=True)

        self.model = get_model(self.model_name, pretrained=False)
        load_model_multiGPU(self.model, checkpoint_file)
        # load_model(self.model, checkpoint_file)

        self.model = torch.nn.DataParallel(self.model).cuda()
        self.model.eval()

        all_idxs = []
        all_labels = []
        with torch.no_grad():
            print('testting total %d images' % len(test_dataset))
            for i, (input, labels) in enumerate(test_loader):  # tensor type
                print('testting batch: %d/%d' %
                      (i, len(test_dataset) / self.batch_size))
                input = input.cuda()
                if (ten_crop):
                    bs, ncrops, c, h, w = input.size()
                    input = input.view(-1, c, h, w)
                    output = self.model(input).view(bs, ncrops,
                                                    -1).mean(1).view(bs, -1)
                else:
                    output = self.model(input)  # 2-D tensor
                if (not prob):
                    pred = output.topk(1)[-1]  # pytorch tensor type
                else:
                    pred = output

                all_idxs.append(labels)
                all_labels.append(pred.data.cpu())

        all_labels = torch.cat(all_labels, dim=0).numpy()
        all_idxs = torch.cat(all_idxs, dim=0).numpy().reshape(-1, 1)

        res = np.concatenate((all_idxs, all_labels), axis=1)
        print('writing pred file %s ...' % prediction_file_path)
        np.save(prediction_file_path, res)
        print('done.')