def load_train_data(self, split=False, train_validate_split=0.8, data_directory='./tmp/cifar100/'): print('Loading CIFAR 100 Training Dataset') basic_dir_path = data_directory + 'cifar-10-batches/' data_batch_path = 'data_batch_' data_files = [] data_dict = [] for i in range(1, 6): data_files.append(str(basic_dir_path + data_batch_path + str(i))) for file in data_files: print('Unpickling data file: %s' % file) data_dict.append(file_utils.unpickle(file)) data_labels = [] data_images = [] for i in range(len(data_dict) - 1): print('Reading unpicked data file: %s' % data_files[i]) data, labels, _, _ = self.dict_read(data_dict[i]) data_labels.extend(labels) data_images.extend(data) self.data['train_images'] = np.array(data_images) self.data['train_labels'] = np.array(data_labels) del data_labels del data_images return True
def load_train_data(self, split=False, data_directory='./tmp/STL10/'): print('Loading STL 10 Training Dataset') basic_dir_path = data_directory + 'STL-10-batches/' data_batch_path = 'data_batch_' data_files = [] data_dict = [] for i in range(1, 6): data_files.append(str(basic_dir_path + data_batch_path + str(i))) for file in data_files: # print('Unpickling data file: %s' % file) data_dict.append(file_utils.unpickle(file)) data_labels = [] data_images = [] for i in range(len(data_dict)): print('Reading unpicked data file: %s' % data_files[i]) data, labels, _, _ = self.dict_read(data_dict[i]) data_labels.extend(labels) data_images.extend(data) data_images = np.array(data_images) data_labels = np.array(data_labels) if self.train_validate_split is None: self.train.data = np.array(data_images[:self.num_images, :]) self.train.class_labels = np.array(data_labels[:self.num_images]) self.train.class_names = np.array( list(map(lambda x: self.classes[x], self.train.class_labels))) else: print('Requested to use only %d images' % self.num_images) self.train.data = np.array(data_images[:self.num_train_images, :]) self.train.class_labels = np.array( data_labels[:self.num_train_images]) self.train.class_names = np.array( list(map(lambda x: self.classes[x], self.train.class_labels))) self.validate.data = \ np.array(data_images[self.num_train_images:self.num_train_images+self.num_validate_images, :]) self.validate.class_labels = \ np.array(data_labels[self.num_train_images:self.num_train_images+self.num_validate_images]) self.validate.class_names = np.array( list(map(lambda x: self.classes[x], self.validate.class_labels))) if self.one_hot_encode is True: self.convert_one_hot_encoding(self.train.class_labels, data_type='train') if self.train_validate_split is not None: self.convert_one_hot_encoding(self.validate.class_labels, data_type='validate') del data_labels del data_images return True
def load_test_data(self, data_directory='/tmp/cifar10/'): """ :param data_directory: :return: """ print('Loading CIFAR 10 Test Dataset') basic_dir_path = data_directory + 'cifar-10-batches/' test_batch_path = 'test_batch' test_files = [str(basic_dir_path + test_batch_path)] print('Unpickling test file: %s' % test_files[0]) test_dict = [file_utils.unpickle(test_files[0])] test_labels = [] test_images = [] print('Reading unpicked test file: %s' % test_files[0]) test_labels.extend(self.dict_read(test_dict[-1])[1]) test_images.extend(self.dict_read(test_dict[-1])[0]) test_images = np.array(test_images) preprocessed_images = transform(test_images, transform_method=self.preprocess) if self.make_image is True: images = [] for fig_num in range(preprocessed_images.shape[0]): fig = preprocessed_images[fig_num, :] img = self.convert_images(fig, type=self.image_mode) images.append(img) images = np.array(images) test_labels = np.array(test_labels) self.test.data = np.array(preprocessed_images[:self.num_test_images]) if self.make_image is True: self.test.images = np.array(images[:self.num_test_images, :]) self.test.fine_labels = np.array(test_labels[:self.num_test_images]) self.test.fine_class_names = np.array(list(map(lambda x: self.fine_classes[x], self.test.fine_labels))) if self.one_hot_encode is True: self.convert_one_hot_encoding(self.test.fine_labels, data_type='test') if self.save_h5py != '': h5f = h5py.File(self.save_h5py, 'a') h5f.create_dataset('test_dataset', data=self.test.data, compression="gzip", compression_opts=9) print('Written CIFAR 10 test dataset to file: %s' % self.save_h5py) h5f.close() del test_labels del test_images del preprocessed_images if self.make_image is True: del images print() return True
def load_test_data(self, data_directory='/tmp/cifar100/'): print('Loading CIFAR 100 Test Dataset') basic_dir_path = data_directory + 'cifar-10-batches/' test_batch_path = 'test_batch' test_files = [str(basic_dir_path + test_batch_path)] print('Unpickling test file: %s' % test_files[0]) test_dict = [file_utils.unpickle(test_files[0])] test_labels = [] test_images = [] print('Reading unpicked test file: %s' % test_files[0]) test_labels.extend(self.dict_read(test_dict[-1])[1]) test_images.extend(self.dict_read(test_dict[-1])[0]) test_images = np.array(test_images) test_labels = np.array(test_labels) self.data['test_images'] = np.array(test_images) self.data['test_labels'] = np.array(test_labels) del test_labels del test_images return True
def load_test_data(self, data_directory='/tmp/cifar10/'): print('Loading CIFAR 10 Test Dataset') basic_dir_path = data_directory + 'cifar-10-batches/' test_batch_path = 'test_batch' test_files = [str(basic_dir_path + test_batch_path)] print('Unpickling test file: %s' % test_files[0]) test_dict = [file_utils.unpickle(test_files[0])] test_labels = [] test_images = [] print('Reading unpicked test file: %s' % test_files[0]) test_labels.extend(self.dict_read(test_dict[-1])[1]) test_images.extend(self.dict_read(test_dict[-1])[0]) test_images = np.array(test_images) preprocessed_images = transform(test_images, transform_method=self.preprocess) if self.make_image is True: images = [] for fig_num in range(preprocessed_images.shape[0]): fig = preprocessed_images[fig_num, :] img = self.convert_images(fig, type=self.image_mode) images.append(img) images = np.array(images) test_labels = np.array(test_labels) self.test.data = np.array(preprocessed_images[:self.num_test_images]) if self.make_image is True: self.test.images = np.array(images[:self.num_test_images, :]) self.test.class_labels = np.array(test_labels[:self.num_test_images]) self.test.class_names = np.array( list(map(lambda x: self.classes[x], self.test.class_labels))) if self.one_hot_encode is True: self.convert_one_hot_encoding(self.test.class_labels, data_type='test') del test_labels del test_images del preprocessed_images if self.make_image is True: del images return True
def load_test_data(self, data_directory='/tmp/STL10/'): print('Loading STL 10 Test Dataset') basic_dir_path = data_directory + 'STL-10-batches/' test_batch_path = 'test_batch' test_files = [str(basic_dir_path + test_batch_path)] print('Unpickling test file: %s' % test_files[0]) test_dict = [file_utils.unpickle(test_files[0])] test_labels = [] test_images = [] print('Reading unpicked test file: %s' % test_files[0]) test_labels.extend(self.dict_read(test_dict[-1])[1]) test_images.extend(self.dict_read(test_dict[-1])[0]) test_images = np.array(test_images) test_labels = np.array(test_labels) self.test.data = np.array(test_images[:self.num_test_images]) self.test.class_labels = np.array(test_labels[:self.num_test_images]) self.test.class_names = np.array( list(map(lambda x: self.classes[x], self.test.class_labels))) if self.one_hot_encode is True: self.convert_one_hot_encoding(self.test.class_labels, data_type='test') del test_labels del test_images return True
def load_train_data(self, data_directory='/tmp/cifar100/'): """ :param data_directory: :return: """ print('Loading CIFAR 100 Train Dataset') basic_dir_path = data_directory + 'cifar-100-batches/' data_batch_path = 'train' data_files = [basic_dir_path + data_batch_path] data_dict = [file_utils.unpickle(data_files[0])] print('Reading unpicked data file: %s' % data_files[0]) data, fine_labels, coarse_labels, _, _ = self.dict_read(data_dict[0]) print(np.max(fine_labels)) print(np.max(coarse_labels)) data_fine_labels = fine_labels data_coarse_labels = coarse_labels data_images = np.array(data) data_fine_labels = np.array(data_fine_labels) data_coarse_labels = np.array(data_coarse_labels) print('Success') preprocessed_images = transform(data_images, transform_method=self.preprocess) if self.make_image is True: images = [] for fig_num in range(preprocessed_images.shape[0]): fig = preprocessed_images[fig_num, :] img = self.convert_images(fig, type=self.image_mode) images.append(img) images = np.array(images) if self.train_validate_split is None: self.train.data = np.array( preprocessed_images[:self.num_images, :]) if self.make_image is True: self.train.images = np.array(images[:self.num_images, :]) self.train.fine_labels = np.array( data_fine_labels[:self.num_images]) self.train.coarse_labels = np.array( data_coarse_labels[:self.num_images]) self.train.fine_class_names = np.array( list( map(lambda x: self.fine_classes[x], self.train.fine_labels))) print(self.fine_classes[:15]) print(self.train.fine_labels[:15]) print(self.train.fine_class_names[:15]) self.train.coarse_class_names = np.array( list( map(lambda x: self.coarse_classes[x], self.train.coarse_labels))) else: print('Requested to use only %d images' % self.num_images) self.train.data = np.array( preprocessed_images[:self.num_train_images, :]) if self.make_image is True: self.train.images = np.array(images[:self.num_train_images, :]) self.train.fine_labels = np.array( data_fine_labels[:self.num_train_images]) self.train.coarse_labels = np.array( data_coarse_labels[:self.num_train_images]) self.train.fine_class_names = np.array( list( map(lambda x: self.fine_classes[x], self.train.fine_labels))) self.train.coarse_class_names = np.array( list( map(lambda x: self.coarse_classes[x], self.train.coarse_labels))) self.validate.data = \ np.array(preprocessed_images[self.num_train_images:self.num_train_images+self.num_validate_images, :]) if self.make_image is True: self.validate.images = np.array( images[self.num_train_images:self.num_train_images + self.num_validate_images, :]) self.validate.fine_labels = \ np.array(data_fine_labels[self.num_train_images:self.num_train_images+self.num_validate_images]) self.validate.coarse_labels = \ np.array(data_coarse_labels[self.num_train_images:self.num_train_images + self.num_validate_images]) self.validate.fine_class_names = np.array( list( map(lambda x: self.fine_classes[x], self.validate.fine_labels))) self.validate.coarse_class_names = np.array( list( map(lambda x: self.coarse_classes[x], self.validate.coarse_labels))) if self.one_hot_encode is True: self.convert_one_hot_encoding(self.train.fine_labels, data_type='train', class_type='fine') self.convert_one_hot_encoding(self.train.coarse_labels, data_type='train', class_type='coarse') if self.train_validate_split is not None: self.convert_one_hot_encoding(self.validate.fine_labels, data_type='validate', class_type='fine') self.convert_one_hot_encoding(self.validate.coarse_labels, data_type='validate', class_type='coarse') if self.save_h5py != '': h5f = h5py.File(self.save_h5py, 'a') h5f.create_dataset('train_dataset', data=self.train.data, compression="gzip", compression_opts=9) print('Written CIFAR 100 train dataset to file: %s' % self.save_h5py) h5f.close() del data_coarse_labels del data_fine_labels del data_images del preprocessed_images if self.make_image is True: del images print() return True
def load_train_data(self, split=False, data_directory='/tmp/cifar10/'): print('Loading CIFAR 10 Training Dataset') basic_dir_path = data_directory + 'cifar-10-batches/' data_batch_path = 'data_batch_' data_files = [] data_dict = [] for i in range(1, 6): data_files.append(str(basic_dir_path + data_batch_path + str(i))) for file in data_files: # print('Unpickling data file: %s' % file) data_dict.append(file_utils.unpickle(file)) data_labels = [] data_images = [] for i in range(len(data_dict)): print('Reading unpicked data file: %s' % data_files[i]) data, labels, _, _ = self.dict_read(data_dict[i]) data_labels.extend(labels) data_images.extend(data) data_images = np.array(data_images) data_labels = np.array(data_labels) preprocessed_images = transform(data_images, transform_method=self.preprocess) if self.make_image is True: images = [] for fig_num in range(preprocessed_images.shape[0]): fig = preprocessed_images[fig_num, :] img = self.convert_images(fig, type=self.image_mode) images.append(img) images = np.array(images) if self.train_validate_split is None: self.train.data = np.array( preprocessed_images[:self.num_images, :]) if self.make_image is True: self.train.images = np.array(images[:self.num_images, :]) self.train.class_labels = np.array(data_labels[:self.num_images]) self.train.class_names = np.array( list(map(lambda x: self.classes[x], self.train.class_labels))) else: print('Requested to use only %d images' % self.num_images) self.train.data = np.array( preprocessed_images[:self.num_train_images, :]) if self.make_image is True: self.train.images = np.array(images[:self.num_train_images, :]) self.train.class_labels = np.array( data_labels[:self.num_train_images]) self.train.class_names = np.array( list(map(lambda x: self.classes[x], self.train.class_labels))) self.validate.data = \ np.array(preprocessed_images[self.num_train_images:self.num_train_images+self.num_validate_images, :]) if self.make_image is True: self.validate.images = np.array( images[self.num_train_images:self.num_train_images + self.num_validate_images, :]) self.validate.class_labels = \ np.array(data_labels[self.num_train_images:self.num_train_images+self.num_validate_images]) self.validate.class_names = np.array( list(map(lambda x: self.classes[x], self.validate.class_labels))) if self.one_hot_encode is True: self.convert_one_hot_encoding(self.train.class_labels, data_type='train') if self.train_validate_split is not None: self.convert_one_hot_encoding(self.validate.class_labels, data_type='validate') del data_labels del data_images del preprocessed_images if self.make_image is True: del images return True