Пример #1
0
 def load_train_data(self,
                     split=False,
                     train_validate_split=0.8,
                     data_directory='./tmp/cifar100/'):
     print('Loading CIFAR 100 Training Dataset')
     basic_dir_path = data_directory + 'cifar-10-batches/'
     data_batch_path = 'data_batch_'
     data_files = []
     data_dict = []
     for i in range(1, 6):
         data_files.append(str(basic_dir_path + data_batch_path + str(i)))
     for file in data_files:
         print('Unpickling data file: %s' % file)
         data_dict.append(file_utils.unpickle(file))
     data_labels = []
     data_images = []
     for i in range(len(data_dict) - 1):
         print('Reading unpicked data file: %s' % data_files[i])
         data, labels, _, _ = self.dict_read(data_dict[i])
         data_labels.extend(labels)
         data_images.extend(data)
     self.data['train_images'] = np.array(data_images)
     self.data['train_labels'] = np.array(data_labels)
     del data_labels
     del data_images
     return True
Пример #2
0
 def load_train_data(self, split=False, data_directory='./tmp/STL10/'):
     print('Loading STL 10 Training Dataset')
     basic_dir_path = data_directory + 'STL-10-batches/'
     data_batch_path = 'data_batch_'
     data_files = []
     data_dict = []
     for i in range(1, 6):
         data_files.append(str(basic_dir_path + data_batch_path + str(i)))
     for file in data_files:
         # print('Unpickling data file: %s' % file)
         data_dict.append(file_utils.unpickle(file))
     data_labels = []
     data_images = []
     for i in range(len(data_dict)):
         print('Reading unpicked data file: %s' % data_files[i])
         data, labels, _, _ = self.dict_read(data_dict[i])
         data_labels.extend(labels)
         data_images.extend(data)
     data_images = np.array(data_images)
     data_labels = np.array(data_labels)
     if self.train_validate_split is None:
         self.train.data = np.array(data_images[:self.num_images, :])
         self.train.class_labels = np.array(data_labels[:self.num_images])
         self.train.class_names = np.array(
             list(map(lambda x: self.classes[x], self.train.class_labels)))
     else:
         print('Requested to use only %d images' % self.num_images)
         self.train.data = np.array(data_images[:self.num_train_images, :])
         self.train.class_labels = np.array(
             data_labels[:self.num_train_images])
         self.train.class_names = np.array(
             list(map(lambda x: self.classes[x], self.train.class_labels)))
         self.validate.data = \
             np.array(data_images[self.num_train_images:self.num_train_images+self.num_validate_images, :])
         self.validate.class_labels = \
             np.array(data_labels[self.num_train_images:self.num_train_images+self.num_validate_images])
         self.validate.class_names = np.array(
             list(map(lambda x: self.classes[x],
                      self.validate.class_labels)))
     if self.one_hot_encode is True:
         self.convert_one_hot_encoding(self.train.class_labels,
                                       data_type='train')
         if self.train_validate_split is not None:
             self.convert_one_hot_encoding(self.validate.class_labels,
                                           data_type='validate')
     del data_labels
     del data_images
     return True
Пример #3
0
 def load_test_data(self, data_directory='/tmp/cifar10/'):
     """
     
     :param data_directory: 
     :return: 
     """
     print('Loading CIFAR 10 Test Dataset')
     basic_dir_path = data_directory + 'cifar-10-batches/'
     test_batch_path = 'test_batch'
     test_files = [str(basic_dir_path + test_batch_path)]
     print('Unpickling test file: %s' % test_files[0])
     test_dict = [file_utils.unpickle(test_files[0])]
     test_labels = []
     test_images = []
     print('Reading unpicked test file: %s' % test_files[0])
     test_labels.extend(self.dict_read(test_dict[-1])[1])
     test_images.extend(self.dict_read(test_dict[-1])[0])
     test_images = np.array(test_images)
     preprocessed_images = transform(test_images, transform_method=self.preprocess)
     if self.make_image is True:
         images = []
         for fig_num in range(preprocessed_images.shape[0]):
             fig = preprocessed_images[fig_num, :]
             img = self.convert_images(fig, type=self.image_mode)
             images.append(img)
         images = np.array(images)
     test_labels = np.array(test_labels)
     self.test.data = np.array(preprocessed_images[:self.num_test_images])
     if self.make_image is True:
         self.test.images = np.array(images[:self.num_test_images, :])
     self.test.fine_labels = np.array(test_labels[:self.num_test_images])
     self.test.fine_class_names = np.array(list(map(lambda x: self.fine_classes[x], self.test.fine_labels)))
     if self.one_hot_encode is True:
         self.convert_one_hot_encoding(self.test.fine_labels, data_type='test')
     if self.save_h5py != '':
         h5f = h5py.File(self.save_h5py, 'a')
         h5f.create_dataset('test_dataset', data=self.test.data, compression="gzip", compression_opts=9)
         print('Written CIFAR 10 test dataset to file: %s' % self.save_h5py)
         h5f.close()
     del test_labels
     del test_images
     del preprocessed_images
     if self.make_image is True:
         del images
     print()
     return True
Пример #4
0
 def load_test_data(self, data_directory='/tmp/cifar100/'):
     print('Loading CIFAR 100 Test Dataset')
     basic_dir_path = data_directory + 'cifar-10-batches/'
     test_batch_path = 'test_batch'
     test_files = [str(basic_dir_path + test_batch_path)]
     print('Unpickling test file: %s' % test_files[0])
     test_dict = [file_utils.unpickle(test_files[0])]
     test_labels = []
     test_images = []
     print('Reading unpicked test file: %s' % test_files[0])
     test_labels.extend(self.dict_read(test_dict[-1])[1])
     test_images.extend(self.dict_read(test_dict[-1])[0])
     test_images = np.array(test_images)
     test_labels = np.array(test_labels)
     self.data['test_images'] = np.array(test_images)
     self.data['test_labels'] = np.array(test_labels)
     del test_labels
     del test_images
     return True
Пример #5
0
 def load_test_data(self, data_directory='/tmp/cifar10/'):
     print('Loading CIFAR 10 Test Dataset')
     basic_dir_path = data_directory + 'cifar-10-batches/'
     test_batch_path = 'test_batch'
     test_files = [str(basic_dir_path + test_batch_path)]
     print('Unpickling test file: %s' % test_files[0])
     test_dict = [file_utils.unpickle(test_files[0])]
     test_labels = []
     test_images = []
     print('Reading unpicked test file: %s' % test_files[0])
     test_labels.extend(self.dict_read(test_dict[-1])[1])
     test_images.extend(self.dict_read(test_dict[-1])[0])
     test_images = np.array(test_images)
     preprocessed_images = transform(test_images,
                                     transform_method=self.preprocess)
     if self.make_image is True:
         images = []
         for fig_num in range(preprocessed_images.shape[0]):
             fig = preprocessed_images[fig_num, :]
             img = self.convert_images(fig, type=self.image_mode)
             images.append(img)
         images = np.array(images)
     test_labels = np.array(test_labels)
     self.test.data = np.array(preprocessed_images[:self.num_test_images])
     if self.make_image is True:
         self.test.images = np.array(images[:self.num_test_images, :])
     self.test.class_labels = np.array(test_labels[:self.num_test_images])
     self.test.class_names = np.array(
         list(map(lambda x: self.classes[x], self.test.class_labels)))
     if self.one_hot_encode is True:
         self.convert_one_hot_encoding(self.test.class_labels,
                                       data_type='test')
     del test_labels
     del test_images
     del preprocessed_images
     if self.make_image is True:
         del images
     return True
Пример #6
0
 def load_test_data(self, data_directory='/tmp/STL10/'):
     print('Loading STL 10 Test Dataset')
     basic_dir_path = data_directory + 'STL-10-batches/'
     test_batch_path = 'test_batch'
     test_files = [str(basic_dir_path + test_batch_path)]
     print('Unpickling test file: %s' % test_files[0])
     test_dict = [file_utils.unpickle(test_files[0])]
     test_labels = []
     test_images = []
     print('Reading unpicked test file: %s' % test_files[0])
     test_labels.extend(self.dict_read(test_dict[-1])[1])
     test_images.extend(self.dict_read(test_dict[-1])[0])
     test_images = np.array(test_images)
     test_labels = np.array(test_labels)
     self.test.data = np.array(test_images[:self.num_test_images])
     self.test.class_labels = np.array(test_labels[:self.num_test_images])
     self.test.class_names = np.array(
         list(map(lambda x: self.classes[x], self.test.class_labels)))
     if self.one_hot_encode is True:
         self.convert_one_hot_encoding(self.test.class_labels,
                                       data_type='test')
     del test_labels
     del test_images
     return True
Пример #7
0
    def load_train_data(self, data_directory='/tmp/cifar100/'):
        """
        
        :param data_directory: 
        :return: 
        """
        print('Loading CIFAR 100 Train Dataset')
        basic_dir_path = data_directory + 'cifar-100-batches/'
        data_batch_path = 'train'
        data_files = [basic_dir_path + data_batch_path]
        data_dict = [file_utils.unpickle(data_files[0])]
        print('Reading unpicked data file: %s' % data_files[0])
        data, fine_labels, coarse_labels, _, _ = self.dict_read(data_dict[0])
        print(np.max(fine_labels))
        print(np.max(coarse_labels))
        data_fine_labels = fine_labels
        data_coarse_labels = coarse_labels
        data_images = np.array(data)
        data_fine_labels = np.array(data_fine_labels)
        data_coarse_labels = np.array(data_coarse_labels)
        print('Success')
        preprocessed_images = transform(data_images,
                                        transform_method=self.preprocess)
        if self.make_image is True:
            images = []
            for fig_num in range(preprocessed_images.shape[0]):
                fig = preprocessed_images[fig_num, :]
                img = self.convert_images(fig, type=self.image_mode)
                images.append(img)
            images = np.array(images)
        if self.train_validate_split is None:
            self.train.data = np.array(
                preprocessed_images[:self.num_images, :])
            if self.make_image is True:
                self.train.images = np.array(images[:self.num_images, :])
            self.train.fine_labels = np.array(
                data_fine_labels[:self.num_images])
            self.train.coarse_labels = np.array(
                data_coarse_labels[:self.num_images])
            self.train.fine_class_names = np.array(
                list(
                    map(lambda x: self.fine_classes[x],
                        self.train.fine_labels)))
            print(self.fine_classes[:15])
            print(self.train.fine_labels[:15])
            print(self.train.fine_class_names[:15])
            self.train.coarse_class_names = np.array(
                list(
                    map(lambda x: self.coarse_classes[x],
                        self.train.coarse_labels)))

        else:
            print('Requested to use only %d images' % self.num_images)
            self.train.data = np.array(
                preprocessed_images[:self.num_train_images, :])
            if self.make_image is True:
                self.train.images = np.array(images[:self.num_train_images, :])
            self.train.fine_labels = np.array(
                data_fine_labels[:self.num_train_images])
            self.train.coarse_labels = np.array(
                data_coarse_labels[:self.num_train_images])

            self.train.fine_class_names = np.array(
                list(
                    map(lambda x: self.fine_classes[x],
                        self.train.fine_labels)))
            self.train.coarse_class_names = np.array(
                list(
                    map(lambda x: self.coarse_classes[x],
                        self.train.coarse_labels)))
            self.validate.data = \
                np.array(preprocessed_images[self.num_train_images:self.num_train_images+self.num_validate_images, :])
            if self.make_image is True:
                self.validate.images = np.array(
                    images[self.num_train_images:self.num_train_images +
                           self.num_validate_images, :])
            self.validate.fine_labels = \
                np.array(data_fine_labels[self.num_train_images:self.num_train_images+self.num_validate_images])
            self.validate.coarse_labels = \
                np.array(data_coarse_labels[self.num_train_images:self.num_train_images + self.num_validate_images])
            self.validate.fine_class_names = np.array(
                list(
                    map(lambda x: self.fine_classes[x],
                        self.validate.fine_labels)))
            self.validate.coarse_class_names = np.array(
                list(
                    map(lambda x: self.coarse_classes[x],
                        self.validate.coarse_labels)))
        if self.one_hot_encode is True:
            self.convert_one_hot_encoding(self.train.fine_labels,
                                          data_type='train',
                                          class_type='fine')
            self.convert_one_hot_encoding(self.train.coarse_labels,
                                          data_type='train',
                                          class_type='coarse')
            if self.train_validate_split is not None:
                self.convert_one_hot_encoding(self.validate.fine_labels,
                                              data_type='validate',
                                              class_type='fine')
                self.convert_one_hot_encoding(self.validate.coarse_labels,
                                              data_type='validate',
                                              class_type='coarse')

        if self.save_h5py != '':
            h5f = h5py.File(self.save_h5py, 'a')
            h5f.create_dataset('train_dataset',
                               data=self.train.data,
                               compression="gzip",
                               compression_opts=9)
            print('Written CIFAR 100 train dataset to file: %s' %
                  self.save_h5py)
            h5f.close()
        del data_coarse_labels
        del data_fine_labels
        del data_images
        del preprocessed_images
        if self.make_image is True:
            del images
        print()
        return True
Пример #8
0
 def load_train_data(self, split=False, data_directory='/tmp/cifar10/'):
     print('Loading CIFAR 10 Training Dataset')
     basic_dir_path = data_directory + 'cifar-10-batches/'
     data_batch_path = 'data_batch_'
     data_files = []
     data_dict = []
     for i in range(1, 6):
         data_files.append(str(basic_dir_path + data_batch_path + str(i)))
     for file in data_files:
         # print('Unpickling data file: %s' % file)
         data_dict.append(file_utils.unpickle(file))
     data_labels = []
     data_images = []
     for i in range(len(data_dict)):
         print('Reading unpicked data file: %s' % data_files[i])
         data, labels, _, _ = self.dict_read(data_dict[i])
         data_labels.extend(labels)
         data_images.extend(data)
     data_images = np.array(data_images)
     data_labels = np.array(data_labels)
     preprocessed_images = transform(data_images,
                                     transform_method=self.preprocess)
     if self.make_image is True:
         images = []
         for fig_num in range(preprocessed_images.shape[0]):
             fig = preprocessed_images[fig_num, :]
             img = self.convert_images(fig, type=self.image_mode)
             images.append(img)
         images = np.array(images)
     if self.train_validate_split is None:
         self.train.data = np.array(
             preprocessed_images[:self.num_images, :])
         if self.make_image is True:
             self.train.images = np.array(images[:self.num_images, :])
         self.train.class_labels = np.array(data_labels[:self.num_images])
         self.train.class_names = np.array(
             list(map(lambda x: self.classes[x], self.train.class_labels)))
     else:
         print('Requested to use only %d images' % self.num_images)
         self.train.data = np.array(
             preprocessed_images[:self.num_train_images, :])
         if self.make_image is True:
             self.train.images = np.array(images[:self.num_train_images, :])
         self.train.class_labels = np.array(
             data_labels[:self.num_train_images])
         self.train.class_names = np.array(
             list(map(lambda x: self.classes[x], self.train.class_labels)))
         self.validate.data = \
             np.array(preprocessed_images[self.num_train_images:self.num_train_images+self.num_validate_images, :])
         if self.make_image is True:
             self.validate.images = np.array(
                 images[self.num_train_images:self.num_train_images +
                        self.num_validate_images, :])
         self.validate.class_labels = \
             np.array(data_labels[self.num_train_images:self.num_train_images+self.num_validate_images])
         self.validate.class_names = np.array(
             list(map(lambda x: self.classes[x],
                      self.validate.class_labels)))
     if self.one_hot_encode is True:
         self.convert_one_hot_encoding(self.train.class_labels,
                                       data_type='train')
         if self.train_validate_split is not None:
             self.convert_one_hot_encoding(self.validate.class_labels,
                                           data_type='validate')
     del data_labels
     del data_images
     del preprocessed_images
     if self.make_image is True:
         del images
     return True