def transform(dataset_reader, data_dir, output_dir, splits, only_classes, only_images, limit_examples, limit_classes, seed, overrides, debug): """ Prepares dataset for ingestion. Converts the dataset into different (one per split) TFRecords files. """ if debug: tf.logging.set_verbosity(tf.logging.DEBUG) else: tf.logging.set_verbosity(tf.logging.INFO) # We forcefully save modified datasets into subfolders to avoid # overwriting and/or unnecessary clutter. output_subfolder = get_output_subfolder( only_classes, only_images, limit_examples, limit_classes ) if output_subfolder: output_dir = os.path.join(output_dir, output_subfolder) try: reader = get_reader(dataset_reader) except ValueError as e: tf.logging.error('Error getting reader: {}'.format(e)) return # All splits must have a consistent set of classes. classes = None reader_kwargs = parse_override(overrides) try: for split in splits: # Create instance of reader. split_reader = reader( data_dir, split, only_classes=only_classes, only_images=only_images, limit_examples=limit_examples, limit_classes=limit_classes, seed=seed, **reader_kwargs ) if classes is None: # "Save" classes from the first split reader classes = split_reader.classes else: # Overwrite classes after first split for consistency. split_reader.set_classes(classes) # We assume we are saving object detection objects, but it should # be easy to modify once we have different types of objects. writer = ObjectDetectionWriter(split_reader, output_dir, split) writer.save() except InvalidDataDirectory as e: tf.logging.error('Error reading dataset: {}'.format(e))
def transform(dataset_reader, data_dir, output_dir, splits, only_classes, only_images, limit_examples, class_examples, overrides, debug): """ Prepares dataset for ingestion. Converts the dataset into different (one per split) TFRecords files. """ tf.logging.set_verbosity(tf.logging.INFO) if debug: tf.logging.set_verbosity(tf.logging.DEBUG) try: reader = get_reader(dataset_reader) except ValueError as e: tf.logging.error('Error getting reader: {}'.format(e)) return # All splits must have a consistent set of classes. classes = None reader_kwargs = parse_override(overrides) try: for split in splits: # Create instance of reader. split_reader = reader(data_dir, split, only_classes=only_classes, only_images=only_images, limit_examples=limit_examples, class_examples=class_examples, **reader_kwargs) if classes is None: # "Save" classes from the first split reader classes = split_reader.classes else: # Overwrite classes after first split for consistency. split_reader.classes = classes # We assume we are saving object detection objects, but it should # be easy to modify once we have different types of objects. writer = ObjectDetectionWriter(split_reader, output_dir, split) writer.save() tf.logging.info('Composition per class ({}):'.format(split)) for label, count in split_reader._per_class_counter.most_common(): tf.logging.info('\t%s: %d', split_reader.pretty_name(label), count) except InvalidDataDirectory as e: tf.logging.error('Error reading dataset: {}'.format(e))