Пример #1
0
class PyTorchDataset(Dataset):
    def __init__(self,
                 txt,
                 config,
                 transform=None,
                 loader=None,
                 target_transform=None,
                 is_train_set=True):
        self.config = config
        imgs = []
        with open(txt, 'r') as f:
            data = json.load(f)
            for element in data:
                #line = line.strip('\n\r').strip('\n').strip('\r')
                #words = line.split(self.config['file_label_separator'])
                # single label here so we use int(words[1])
                imgs.append(
                    (element['image_id'], int(element['disease_class'])))

        self.DataProcessor = DataProcessor(self.config)
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.is_train_set = is_train_set

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        _root_dir = self.config[
            'train_data_root_dir'] if self.is_train_set else self.config[
                'val_data_root_dir']
        image = self.self_defined_loader(os.path.join(_root_dir, fn))
        if self.transform is not None:
            image = self.transform(image)
            #print("labellllllllllllllllllllllllllllllllllll",label)

        return image, label

    def __len__(self):
        return len(self.imgs)

    def self_defined_loader(self, filename):
        image = self.DataProcessor.image_loader(filename)
        image = self.DataProcessor.image_resize(image)
        if self.is_train_set and self.config['data_aug']:
            image = self.DataProcessor.data_aug(image)
        image = self.DataProcessor.input_norm(image)
        return image
Пример #2
0
class MxnetDataset(dataset.Dataset):
    def __init__(self, txt, config, transform=None, is_train_set=True):
        self.config = config
        imgs = []
        # load image filename list here
        with open(txt, 'r') as f:
            for line in f:
                line = line.strip('\n\r').strip('\n').strip('\r')
                words = line.split(self.config['file_label_separator'])
                # single label here so we use int(words[1])
                imgs.append((words[0], int(words[1])))
        self.DataProcessor = DataProcessor(self.config)
        self.imgs = imgs
        self.transform = transform
        self.is_train_set = is_train_set

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        # cv2.imread
        _root_dir = self.config[
            'train_data_root_dir'] if self.is_train_set else self.config[
                'val_data_root_dir']
        image = self.self_defined_loader(os.path.join(_root_dir, fn))
        image = np.transpose(image, (2, 0, 1))
        # ndarray to mx_nd_array
        image = mx.nd.array(image)
        if self.transform is not None:
            image = self.transform(image)
        return image, float(label)

    def __len__(self):
        return len(self.imgs)

    def self_defined_loader(self, filename):
        image = self.DataProcessor.image_loader(filename)
        image = self.DataProcessor.image_resize(image)
        if self.is_train_set and self.config['data_aug']:
            image = self.DataProcessor.data_aug(image)
        image = self.DataProcessor.input_norm(image)
        return image
Пример #3
0
class KerasDataset(keras.utils.Sequence):
    def __init__(self,
                 txt,
                 config,
                 batch_size=1,
                 shuffle=True,
                 is_train_set=True):
        self.config = config
        self.batch_size = batch_size
        self.shuffle = shuffle
        imgs = []
        with open(txt, 'r') as f:
            for line in f:
                line = line.strip('\n\r').strip('\n').strip('\r')
                words = line.split(self.config['file_label_separator'])
                # single label here so we use int(words[1])
                imgs.append((words[0], int(words[1])))

        self.DataProcessor = DataProcessor(self.config)
        self.imgs = imgs
        self.is_train_set = is_train_set
        self.on_epoch_end()

    def __getitem__(self, index):
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) *
                               self.batch_size]
        # Find list of IDs
        batch_data = [self.imgs[k] for k in indexes]
        # Generate data
        images, labels = self._data_generation(batch_data)

        return images, labels

    def __len__(self):
        # calculate batch number of each epoch
        return math.ceil(len(self.imgs) / float(self.batch_size))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.imgs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def _data_generation(self, batch_data):
        # Initialization
        images, labels = [], []
        _root_dir = self.config[
            'train_data_root_dir'] if self.is_train_set else self.config[
                'val_data_root_dir']
        # Generate data
        for idx, (path, label) in enumerate(batch_data):
            # Store sample
            filename = os.path.join(_root_dir, path)
            image = self.self_defined_loader(filename)
            images.append(image)
            # Store class
            labels.append(label)

        return np.array(images), keras.utils.to_categorical(
            labels, num_classes=self.config['num_classes'])
        # return np.array(images), np.array(labels) # keras.utils.to_categorical(labels, num_classes=self.n_classes)

    def self_defined_loader(self, filename):
        image = self.DataProcessor.image_loader(filename)
        image = self.DataProcessor.image_resize(image)
        if self.is_train_set and self.config['data_aug']:
            image = self.DataProcessor.data_aug(image)
        image = self.DataProcessor.input_norm(image)
        return image