Exemplo n.º 1
0
def main(_):
  data_dir = FLAGS.data_dir
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  logging.info('Reading from Pet dataset.')
  image_dir = os.path.join(data_dir, 'images')
  annotations_dir = os.path.join(data_dir, 'annotations')
  examples_path = os.path.join(annotations_dir, 'trainval.txt')
  examples_list = dataset_util.read_examples_list(examples_path)

  # Test images are not included in the downloaded data set, so we shall perform
  # our own split.
  random.seed(42)
  random.shuffle(examples_list)
  num_examples = len(examples_list)
  num_train = int(0.7 * num_examples)
  train_examples = examples_list[:num_train]
  val_examples = examples_list[num_train:]
  logging.info('%d training and %d validation examples.',
               len(train_examples), len(val_examples))

  train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
  val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
  if FLAGS.faces_only:
    train_output_path = os.path.join(FLAGS.output_dir,
                                     'pet_train_with_masks.record')
    val_output_path = os.path.join(FLAGS.output_dir,
                                   'pet_val_with_masks.record')
  create_tf_record(train_output_path, label_map_dict, annotations_dir,
                   image_dir, train_examples, faces_only=FLAGS.faces_only)
  create_tf_record(val_output_path, label_map_dict, annotations_dir,
                   image_dir, val_examples, faces_only=FLAGS.faces_only)
Exemplo n.º 2
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    required_flags = [
        'input_annotations_csv', 'input_images_directory', 'input_label_map',
        'output_tf_record_path_prefix'
    ]
    for flag_name in required_flags:
        if not getattr(FLAGS, flag_name):
            raise ValueError('Flag --{} is required'.format(flag_name))

    label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
    all_annotations = pd.read_csv(FLAGS.input_annotations_csv)
    all_images = tf.gfile.Glob(
        os.path.join(FLAGS.input_images_directory, '*.jpg'))
    all_image_ids = [
        os.path.splitext(os.path.basename(v))[0] for v in all_images
    ]
    all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
    all_annotations = pd.concat([all_annotations, all_image_ids])

    tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
            tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
            FLAGS.num_shards)

        for counter, image_data in enumerate(
                all_annotations.groupby('ImageID')):
            tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...',
                                   1000, counter)

            image_id, image_annotations = image_data
            # In OID image file names are formed by appending ".jpg" to the image ID.
            image_path = os.path.join(FLAGS.input_images_directory,
                                      image_id + '.jpg')
            with tf.gfile.Open(image_path) as image_file:
                encoded_image = image_file.read()

            tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
                image_annotations, label_map, encoded_image)
            if tf_example:
                shard_idx = long(image_id, 16) % FLAGS.num_shards
                output_tfrecords[shard_idx].write(
                    tf_example.SerializeToString())
Exemplo n.º 3
0
    def test_get_label_map_dict(self):
        label_map_string = """
      item {
        id:2
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
        label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
        with tf.gfile.Open(label_map_path, 'wb') as f:
            f.write(label_map_string)

        label_map_dict = label_map_util.get_label_map_dict(label_map_path)
        self.assertEqual(label_map_dict['dog'], 1)
        self.assertEqual(label_map_dict['cat'], 2)
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    if FLAGS.year not in YEARS:
        raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = FLAGS.data_dir
    years = ['VOC2007', 'VOC2012']
    if FLAGS.year != 'merged':
        years = [FLAGS.year]

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     'aeroplane_' + FLAGS.set + '.txt')
        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir,
                                            label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())

    writer.close()
Exemplo n.º 5
0
def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
                               label_map_path, validation_set_size):
    """Convert the KITTI detection dataset to TFRecords.

  Args:
    data_dir: The full path to the unzipped folder containing the unzipped data
      from data_object_image_2 and data_object_label_2.zip.
      Folder structure is assumed to be: data_dir/training/label_2 (annotations)
      and data_dir/data_object_image_2/training/image_2 (images).
    output_path: The path to which TFRecord files will be written. The TFRecord
      with the training set will be located at: <output_path>_train.tfrecord
      And the TFRecord with the validation set will be located at:
      <output_path>_val.tfrecord
    classes_to_use: List of strings naming the classes for which data should be
      converted. Use the same names as presented in the KIITI README file.
      Adding dontcare class will remove all other bounding boxes that overlap
      with areas marked as dontcare regions.
    label_map_path: Path to label map proto
    validation_set_size: How many images should be left as the validation set.
      (Ffirst `validation_set_size` examples are selected to be in the
      validation set).
  """
    label_map_dict = label_map_util.get_label_map_dict(label_map_path)
    train_count = 0
    val_count = 0

    annotation_dir = os.path.join(data_dir, 'training', 'label_2')

    image_dir = os.path.join(data_dir, 'data_object_image_2', 'training',
                             'image_2')

    train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord' %
                                               output_path)
    val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord' % output_path)

    images = sorted(tf.gfile.ListDirectory(image_dir))
    for img_name in images:
        img_num = int(img_name.split('.')[0])
        is_validation_img = img_num < validation_set_size
        img_anno = read_annotation_file(
            os.path.join(annotation_dir,
                         str(img_num).zfill(6) + '.txt'))

        image_path = os.path.join(image_dir, img_name)

        # Filter all bounding boxes of this frame that are of a legal class, and
        # don't overlap with a dontcare region.
        # TODO(talremez) filter out targets that are truncated or heavily occluded.
        annotation_for_image = filter_annotations(img_anno, classes_to_use)

        example = prepare_example(image_path, annotation_for_image,
                                  label_map_dict)
        if is_validation_img:
            val_writer.write(example.SerializeToString())
            val_count += 1
        else:
            train_writer.write(example.SerializeToString())
            train_count += 1

    train_writer.close()
    val_writer.close()