def prepare(dataset_dir): """Runs download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ make_dataset_dir(dataset_dir) image_reader = PNGNumpyImageReader(shape=(_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)) classes = ['zero', 'one', 'two', 'three', 'four', 'five', 'size', 'seven', 'eight', 'nine'] converter = ImagesToTFExampleConverter( classes=classes, colorspace='grayscale', image_format='png', channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE) prepare_dataset(converter, dataset_dir, Modes.TRAIN, 60000, num_eval=10000) prepare_dataset(converter, dataset_dir, Modes.PREDICT, 10000) # Finally, write the meta data: with open(META_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file: meta_data = converter.get_meta_data() meta_data['num_samples'] = {Modes.TRAIN: 50000, Modes.EVAL: 10000, Modes.PREDICT: 10000} meta_data['items_to_descriptions'] = { 'image': 'A image of fixed size 28.', 'label': 'A single integer between 0 and 9', } json.dump(meta_data, meta_data_file) print('\nFinished converting the MNIST dataset!')
def prepare(dataset_dir): """Runs download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ make_dataset_dir(dataset_dir) download_datasets(dataset_dir, _DATA_URL, [_FILENAME], uncompress=True) image_reader = JPEGImageReader(channels=_NUM_CHANNELS) converter = ImagesToTFExampleConverter( classes=list(range(17)), colorspace=_IMAGE_COLORSPACE, image_format=_IMAGE_FORMAT, channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE) prepare_dataset(converter, dataset_dir, 1360, folds=_FOLDS) # Finally, write the meta data: with open(META_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file: meta_data = converter.get_meta_data() meta_data['num_samples'] = {Modes.TRAIN: 1360 - 2 * (1360 // _FOLDS), Modes.EVAL: 1360 // _FOLDS, Modes.PREDICT: 1360 // _FOLDS} meta_data['items_to_descriptions'] = { 'image': 'A image of colorspace {} resized to {}.'.format( _IMAGE_COLORSPACE, _IMAGE_SIZE), 'label': 'A single integer between 0 and 16', } json.dump(meta_data, meta_data_file) print('\nFinished converting the flowers17 dataset!')
def prepare(dataset_dir): """Runs download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ make_dataset_dir(dataset_dir) download_datasets(dataset_dir, _DATA_URL, [_FILENAME], uncompress=True) image_reader = PNGNumpyImageReader(shape=(_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)) classes = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] converter = ImagesToTFExampleConverter(classes=classes, colorspace=_IMAGE_COLORSPACE, image_format=_IMAGE_FORMAT, channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE) prepare_dataset(converter, dataset_dir, ModeKeys.TRAIN, [ _DATA_BATCH_FILENAME_FORMAT.format(dataset_dir, i) for i in range(1, 5) ]) prepare_dataset(converter, dataset_dir, ModeKeys.EVAL, [_DATA_BATCH_FILENAME_FORMAT.format(dataset_dir, 5)]) prepare_dataset(converter, dataset_dir, 'test', [_TEST_DATA_BATCH_FILENAME.format(dataset_dir)]) # Finally, write the meta data: with open(MEAT_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file: meta_data = converter.get_meta_data() meta_data['num_samples'] = { ModeKeys.TRAIN: count_tfrecord_file_content( RECORD_FILE_NAME_FORMAT.format(dataset_dir, ModeKeys.TRAIN)), ModeKeys.EVAL: count_tfrecord_file_content( RECORD_FILE_NAME_FORMAT.format(dataset_dir, ModeKeys.EVAL)), 'test': count_tfrecord_file_content( RECORD_FILE_NAME_FORMAT.format(dataset_dir, 'test')) } meta_data['items_to_descriptions'] = { 'image': 'A image of colorspace {} resized to {}.'.format( _IMAGE_COLORSPACE, _IMAGE_SIZE), 'label': 'A single integer between 0 and {}'.format(len(classes)), } json.dump(meta_data, meta_data_file) print('\nFinished converting the cifar10 dataset!')
def prepare(dataset_dir): """Runs download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ make_dataset_dir(dataset_dir) if all([ tf.gfile.Exists( RECORD_FILE_NAME_FORMAT.format(dataset_dir, Modes.TRAIN)), tf.gfile.Exists( RECORD_FILE_NAME_FORMAT.format(dataset_dir, Modes.EVAL)), tf.gfile.Exists( RECORD_FILE_NAME_FORMAT.format(dataset_dir, Modes.PREDICT)), ]): print('`{}` Dataset files already exist.') return download_datasets(dataset_dir, _DATA_URL, [_FILENAME]) with open(os.path.join(dataset_dir, _FILENAME), 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) converter = SequenceToTFExampleConverter( sequence_features_types={'source_token': 'int'}, context_features_types={'label': 'int'}) num_items = len(train_set[0]) len_eval_data = int(num_items * 0.1) len_test_data = len(test_set[0]) prepare_dataset(converter, dataset_dir, train_set, Modes.TRAIN, num_items, len_eval_data) prepare_dataset(converter, dataset_dir, test_set, Modes.PREDICT, len_test_data) # Finally, write the meta data: with open(META_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file: meta_data = converter.get_meta_data() meta_data['num_samples'] = { Modes.TRAIN: num_items - len_eval_data, Modes.EVAL: len_eval_data, Modes.PREDICT: len_test_data } meta_data['items_to_descriptions'] = { 'source_token': 'A sequence of word ids.', 'label': 'A single integer 0 or 1', } meta_data['num_classes'] = 2 json.dump(meta_data, meta_data_file) delete_datasets(dataset_dir, [_FILENAME]) print('\nFinished converting the IMDB dataset!')
def prepare(dataset_dir): """Runs download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ make_dataset_dir(dataset_dir) image_reader = PNGNumpyImageReader(shape=(_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)) classes = [ 'zero', 'one', 'two', 'three', 'four', 'five', 'size', 'seven', 'eight', 'nine' ] converter = ImagesToTFExampleConverter(classes=classes, colorspace='grayscale', image_format='png', channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE) prepare_dataset(converter, dataset_dir, ModeKeys.TRAIN, 60000, num_eval=10000) prepare_dataset(converter, dataset_dir, 'test', 10000) # Finally, write the meta data: with open(MEAT_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file: meta_data = converter.get_meta_data() meta_data['num_samples'] = { ModeKeys.TRAIN: 50000, ModeKeys.EVAL: 10000, ModeKeys.PREDICT: 10000 } meta_data['items_to_descriptions'] = { 'image': 'A image of fixed size 28.', 'label': 'A single integer between 0 and 9', } json.dump(meta_data, meta_data_file) print('\nFinished converting the MNIST dataset!')
def prepare(dataset_dir): """Runs download and conversion operation. Args: dataset_dir: The dataset directory where the dataset is stored. """ make_dataset_dir(dataset_dir) download_datasets(dataset_dir, _DATA_URL, [_FILENAME], uncompress=True) image_reader = JPEGImageReader(channels=_NUM_CHANNELS) converter = ImagesToTFExampleConverter(classes=list(range(17)), colorspace=_IMAGE_COLORSPACE, image_format=_IMAGE_FORMAT, channels=_NUM_CHANNELS, image_reader=image_reader, height=_IMAGE_SIZE, width=_IMAGE_SIZE) prepare_dataset(converter, dataset_dir, 1360, folds=_FOLDS) # Finally, write the meta data: with open(MEAT_DATA_FILENAME_FORMAT.format(dataset_dir), 'w') as meta_data_file: meta_data = converter.get_meta_data() meta_data['num_samples'] = { Modes.TRAIN: 1360 - 2 * (1360 // _FOLDS), Modes.EVAL: 1360 // _FOLDS, Modes.PREDICT: 1360 // _FOLDS } meta_data['items_to_descriptions'] = { 'image': 'A image of colorspace {} resized to {}.'.format( _IMAGE_COLORSPACE, _IMAGE_SIZE), 'label': 'A single integer between 0 and 16', } json.dump(meta_data, meta_data_file) print('\nFinished converting the flowers17 dataset!')