Пример #1
0
def load_pascal(data_dir, set, is_detect=True, is_seg=False):
    assert is_detect != is_seg

    annotations_dir = os.path.join(data_dir, "Annotations")
    image_dir = os.path.join(data_dir, "JPEGImages")

    pascal_ann_dict = dict()
    if is_detect:
        examples_path = os.path.join(data_dir, 'ImageSets', 'Main',
                                     set + '.txt')
    if is_seg:
        examples_path = os.path.join(data_dir, 'ImageSets', 'Segmentation',
                                     set + '.txt')
    examples_list = dataset_util.read_examples_list(examples_path)

    for example in examples_list:
        path = os.path.join(annotations_dir, example + '.xml')
        with open(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        data["img_path"] = os.path.join(image_dir, data['filename'])
        pascal_ann_dict[data['filename']] = data

    return pascal_ann_dict
Пример #2
0
 def load_anno_sample(image_id):
     anno_file = os.path.join(annotations_dir, image_id + '.xml')
     with open(anno_file, 'r') as fid:
         xml_str = fid.read()
     xml = etree.fromstring(xml_str)
     anno_data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
     return anno_data
def main(_):

    data_dir = FLAGS.data_dir
    annotations_dir = os.path.join(data_dir, 'labels')

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    img_path = os.path.join(data_dir, 'img')
    examples_list = glob.glob(img_path + '/*.jpg')

    # examples_list = dataset_util.read_examples_list(examples_path)

    for idx, example in enumerate(examples_list):

        example = example.split('/')[-1].split('.')[0]

        path = os.path.join(annotations_dir, example + '.xml')

        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(
                data=data,
                dataset_directory=os.path.join(data_dir),
                label_map_dict=label_map_dict,
                ignore_difficult_instances=FLAGS.ignore_difficult_instances,
                image_subdirectory='img')

            writer.write(tf_example.SerializeToString())

    writer.close()
Пример #4
0
def main(_):
    #  if FLAGS.set not in SETS:
    #    raise ValueError('set must be in : {}'.format(SETS))
    #  if FLAGS.year not in YEARS:
    #    raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = 'E:/computerscience/my projects/humanoid/VOCdevkit'
    years = ['VOC2012']
    #  if FLAGS.year != 'merged':
    #    years = [FLAGS.year]

    writer = tf.python_io.TFRecordWriter('pascal_train.record')

    label_map_dict = label_map_util.get_label_map_dict(
        'data/pascal_label_map.pbtxt')

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     'aeroplane_' + 'train' + '.txt')
        annotations_dir = os.path.join(data_dir, year, 'Annotations')
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
            tf_example = dict_to_tf_example(data, data_dir, label_map_dict)
            print(tf_example)
            writer.write(tf_example.SerializeToString())
            break
    writer.close()
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))

  data_dir = FLAGS.data_dir

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
  print ("output_path is :")
  print(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  logging.info('Reading from VID dataset.')
  examples_path = os.path.join(data_dir,'ImageSets', 'VID','list'
                               ,FLAGS.set + '_list.txt')
  annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir, 'VID', FLAGS.set)
  examples_list = dataset_util.read_examples_list(examples_path)
  for idx, example in enumerate(examples_list):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples_list))
    path = os.path.join(annotations_dir, example)
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
    tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                    FLAGS.set)
    writer.write(tf_example.SerializeToString())

  writer.close()
Пример #6
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    if FLAGS.year not in YEARS:
        raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = FLAGS.data_dir
    years = ['VOC2007', 'VOC2012']
    if FLAGS.year != 'merged':
        years = [FLAGS.year]

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     'aeroplane_' + FLAGS.set + '.txt')
        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())

    writer.close()
def _create_tf_record_from_coco_annotations(fs, output_path, num_shards=100):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)

        label_map_dict = {"1": "1", "2": "2", "3": "others"}
        for idx, example in enumerate(fs):
            shard_idx = idx % num_shards
            image_path = example[0]
            annotation_file = example[1]
            if shard_idx == 0:
                logging.info('On image %d', idx)
            assert os.path.basename(image_path).replace(
                ".jpg",
                "") == os.path.basename(annotation_file).replace(".xml", "")
            with tf.gfile.GFile(annotation_file, 'r') as fid:
                xml_str = fid.read()
            try:
                xml = etree.fromstring(xml_str)
            except Exception as ex:
                print(ex)
                xml_str = "\n".join(xml_str.split("\n")[1:])
                xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
            tf_example = dict_to_tf_example(data, image_path, label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            output_tfrecords[shard_idx].write(tf_example.SerializeToString())
Пример #8
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    data_dir = FLAGS.data_dir

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading dataset.')
    examples_path = '/home/wangshiyao/Documents/data/imagenet/gen_list/combine_train_list.txt'
    annotations_dir = '/home/wangshiyao/Documents/data/imagenet/'
    examples_list = dataset_util.read_examples_list(examples_path)

    num_label = [0] * 31
    for idx, example in enumerate(examples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        if int(idx) % 100 == 0:
            print(idx, num_label)
        path = os.path.join(annotations_dir, example)
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        tf_example = dict_to_tf_example(data, example, FLAGS.data_dir,
                                        label_map_dict, FLAGS.set, num_label)
        #writer.write(tf_example.SerializeToString())

    writer.close()
Пример #9
0
def create_record_file(data_dir, output_file, year, split_name):
    years = ['VOC2007', 'VOC2012']
    if year != 'merged':
        years = [year]

    # Create tf.Record writer
    writer = tf.python_io.TFRecordWriter(output_file)

    for year in years:
        print('Creating TFRecord file from PASCAL {} {} dataset'.format(
            year, split_name))

        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     split_name + '.txt')
        annotations_dir = os.path.join(data_dir, year, 'Annotations')
        examples_list = dataset_util.read_examples_list(examples_path)

        for idx in trange(0, len(examples_list)):
            example = examples_list[idx]

            # Find and parse annotation xml file
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            # Create tf.Example and add to tf.Record
            tf_example = _dict_to_tf_example(data, data_dir)
            writer.write(tf_example.SerializeToString())

    writer.close()
    print('Saved tf Record to {}\n'.format(output_file))
Пример #10
0
def load_pascal_single(anno_path, image_dir):
    with open(anno_path, 'r') as fid:
        xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
    data["img_path"] = os.path.join(image_dir, data['filename'])
    return data
Пример #11
0
def create_tf_record(output_filename, label_map_dict, annotations_dir,
                     image_dir, examples):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
    writer = tf.python_io.TFRecordWriter(output_filename)
    for idx, example in enumerate(examples):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples))
        path = os.path.join(annotations_dir, 'xmls', example + '.xml')

        if not os.path.exists(path):
            logging.warning('Could not find %s, ignoring example.', path)
            continue
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
        writer.write(tf_example.SerializeToString())

    writer.close()
Пример #12
0
def _load_anno_sample(anno_path):
    '''
    加载一个标注信息
    :param anno_path: pascal voc 格式标注文件路径
    :return:
    '''
    with open(anno_path, 'r') as fid:
        xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    anno_data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
    return anno_data
def statistic():
    annotations_dir = "/Users/hy/Documents/coco/Annotations/"

    all_xml_fs = [
        os.path.join(annotations_dir, _)
        for _ in sorted(os.listdir(annotations_dir))
    ]
    all_xml_fs = [_ for _ in all_xml_fs if _.endswith(".xml")]

    names = []
    pose = []
    truncated = []
    difficult = []

    width = []
    height = []
    depth = []

    xmin = []
    ymin = []
    xmax = []
    ymax = []

    for annotation_file in all_xml_fs:
        with tf.gfile.GFile(annotation_file, 'r') as fid:
            xml_str = fid.read()
        try:
            xml = etree.fromstring(xml_str)
        except Exception as ex:
            print(ex)
            xml_str = "\n".join(xml_str.split("\n")[1:])
            xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        names.extend([_["name"] for _ in data['object']])
        pose.extend([_["pose"] for _ in data['object']])
        truncated.extend([_["truncated"] for _ in data['object']])
        difficult.extend([_["difficult"] for _ in data['object']])

        width.append(data["size"]["width"])
        height.append(data["size"]["height"])
        depth.append(data["size"]["depth"])

        xmin.append(min([float(_['bndbox']['xmin']) for _ in data['object']]))
        ymin.append(min([float(_['bndbox']['ymin']) for _ in data['object']]))
        xmax.append(max([float(_['bndbox']['xmax']) for _ in data['object']]))
        ymax.append(max([float(_['bndbox']['ymax']) for _ in data['object']]))

    print(set(names))
Пример #14
0
def create_tf_record(output_filename,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples,
                     faces_only=True,
                     mask_type='png'):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples))
    xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
    mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')

    if not os.path.exists(xml_path):
      logging.warning('Could not find %s, ignoring example.', xml_path)
      continue
    with tf.gfile.GFile(xml_path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    try:
      tf_example = dict_to_tf_example(
          data,
          mask_path,
          label_map_dict,
          image_dir,
          faces_only=faces_only,
          mask_type=mask_type)
      writer.write(tf_example.SerializeToString())
    except ValueError:
      logging.warning('Invalid example: %s, ignoring.', xml_path)

  writer.close()
Пример #15
0
def _load_bboxes_names(anno_path):
    with open(anno_path, 'rb') as fid:
        xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    anno_data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
    bboxes = []
    names = []
    if "object" in anno_data:
        for obj in anno_data["object"]:
            xmin = int(obj["bndbox"]["xmin"])
            ymin = int(obj["bndbox"]["ymin"])
            xmax = int(obj["bndbox"]["xmax"])
            ymax = int(obj["bndbox"]["ymax"])
            bboxes.append([xmin, ymin, xmax, ymax])
            names.append(obj["name"])
    return bboxes, names
Пример #16
0
def main(_):
    print(FLAGS.data_dir)
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    #if FLAGS.year not in YEARS:
    #    raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = FLAGS.data_dir
    #years = ['VOC2007', 'VOC2012']
    #if FLAGS.year != 'merged':
    years = [FLAGS.year]

    ACTIONSET = ['tfrecord', 'imageset']
    if FLAGS.action not in ACTIONSET:
        raise ValueError('action must be in : {}'.format(ACTIONSET))
    if FLAGS.action == 'tfrecord':
        pass
    elif FLAGS.action == 'imageset':
        gen_image_set(FLAGS.data_dir, FLAGS.year, FLAGS.imageset)
        return

    writer = tf.io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)
        examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                     FLAGS.imageset + '_' + FLAGS.set + '.txt')
        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.io.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str.encode('utf-8'))
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir,
                                            label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())

    writer.close()
Пример #17
0
def main(_):
    logging.info('Prepare process samples in {}'.format(FLAGS.data_dir))
    data_dir = FLAGS.data_dir

    years = list(map(lambda x: x.strip(), str(FLAGS.year).split(',')))
    label_map_file = FLAGS.label_map_path
    if not os.path.exists(label_map_file):
        label_map_file = os.path.join(data_dir, 'label_map.pbtxt')
        if not os.path.exists(label_map_file):
            raise FileExistsError('label map file not exist.')

    label_map_dict = label_map_util.get_label_map_dict(label_map_file)

    output_path = FLAGS.output_path
    if not output_path:
        output_path = os.path.basename(os.path.dirname(data_dir + os.sep)) + '.tfrecord'
    logging.info('Prepare write samples to {}'.format(output_path))

    writer = tf.io.TFRecordWriter(output_path)

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)

        examples_path = gen_image_set(FLAGS.data_dir, year)
        examples_list = dataset_util.read_examples_list(examples_path)

        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)

        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.io.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str.encode('utf-8'))
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir, year, label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())

    writer.close()
Пример #18
0
def gen_shard(examples_list, annotations_dir, out_filename, root_dir, _set):
    writer = tf.python_io.TFRecordWriter(out_filename)
    for indx, example in enumerate(examples_list):
        ## sample frames
        xml_pattern = os.path.join(annotations_dir, example + '/*.xml')
        xml_files = sorted(glob.glob(xml_pattern))
        samples = sample_frames(xml_files)
        for sample in samples:
            dicts = []
            for xml_file in sample:
                ## process per single xml
                with tf.gfile.GFile(xml_file, 'r') as fid:
                    xml_str = fid.read()
                xml = etree.fromstring(xml_str)
                dic = dataset_util.recursive_parse_xml_to_dict(
                    xml)['annotation']
                dicts.append(dic)
            tf_example = dicts_to_tf_example(dicts, root_dir, _set)
            writer.write(tf_example.SerializeToString())
    writer.close()
    return
Пример #19
0
def create_tf_record(output_filename, label_map_dict, annotations_dir,
                     image_dir, examples):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
    writer = tf.python_io.TFRecordWriter(output_filename)
    for idx, example in enumerate(examples):

        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples))
        path = os.path.join(annotations_dir, 'xmls', example +
                            '.xml')  #setting the path to load the exact xml

        if not os.path.exists(path):
            logging.warning('Could not find %s, ignoring example.', path)
            continue
        with tf.gfile.GFile(path, 'r') as fid:  #open the xml file as a gfile
            xml_str = fid.read()

        xml = etree.fromstring(xml_str)  #parse the xml string

        data = dataset_util.recursive_parse_xml_to_dict(xml)[
            'annotation']  #annotation data as dict

        tf_example = dict_to_tf_example(
            data, label_map_dict, image_dir
        )  #This outputs the ecoded image pixels , heights width , and ground truth cordinates

        writer.write(tf_example.SerializeToString()
                     )  #writing this to a tensorflow record mode

    writer.close()
Пример #20
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    data_dir = FLAGS.data_dir

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    print('Reading from PASCAL dataset.')
    examples_path = os.path.join(data_dir, 'ImageSets', 'Main',
                                 FLAGS.set + '.txt')
    if FLAGS.include_segment_class or FLAGS.include_segment_object:
        examples_path = os.path.join(data_dir, 'ImageSets', 'Segmentation',
                                     FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        path = os.path.join(annotations_dir, example + '.xml')
        mask_filename = None
        if FLAGS.include_segment_class or FLAGS.include_segment_object:
            mask_filename = example + ".png"
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                        FLAGS.ignore_difficult_instances, mask_filename=mask_filename,
                                        include_segment_class=FLAGS.include_segment_class,
                                        include_segment_object=FLAGS.include_segment_object)
        writer.write(tf_example.SerializeToString())

    writer.close()
Пример #21
0
def main(_):
    data_dir = FLAGS.data_dir
    name = FLAGS.name
    data_folder = os.path.join(data_dir, name)
    annotations_dir = os.path.join(data_dir, name, FLAGS.annotations_dir)
    all_annotations = get_all_annotations(annotations_dir)

    train_set = os.path.join(data_folder, "ImageSets",
                             "Main") + os.sep + 'train.txt'
    val_set = os.path.join(data_folder, "ImageSets",
                           "Main") + os.sep + 'val.txt'

    if not os.path.exists(train_set):
        logging.info("Train set not fount, generate 80% from all data.")
        write_annotations(all_annotations[:int(len(all_annotations) * 0.8)],
                          data_folder, 'train')

    if not os.path.exists(val_set):
        logging.info("Validate set not fount, generate 20% from all data.")
        write_annotations(all_annotations[int(len(all_annotations) * 0.8):],
                          data_folder, 'val')

    label_map_path = FLAGS.label_map_path
    if not os.path.exists(label_map_path):
        logging.info("%s not fount, try to find at %s", label_map_path,
                     data_folder)
        label_map_path = os.path.join(data_folder, FLAGS.label_map_path)
        if not os.path.exists(label_map_path):
            logging.info("%s not fount, failed!", label_map_path)
            return

    output_path = FLAGS.output_path
    if not output_path:
        out_name = os.path.basename(data_folder)
        if not out_name:
            out_name = os.path.basename(data_dir)
        output_path = os.path.basename(out_name) + '.tfrecord'

    logging.info("Using label map path: %s.", label_map_path)
    logging.info("Using annotations dir: %s.", annotations_dir)
    logging.info("Using output path: %s.", output_path)
    label_map_dict = label_map_util.get_label_map_dict(label_map_path)

    # print(FLAGS.data_dir)
    # if FLAGS.set not in SETS:
    #     raise ValueError('set must be in : {}'.format(SETS))
    # if FLAGS.year not in YEARS:
    #    raise ValueError('year must be in : {}'.format(YEARS))

    # years = ['VOC2007', 'VOC2012']
    # if FLAGS.year != 'merged':
    # years = [FLAGS.year]

    # ACTIONSET = ['tfrecord', 'imageset']
    # if FLAGS.action not in ACTIONSET:
    #     raise ValueError('action must be in : {}'.format(ACTIONSET))
    # if FLAGS.action == 'tfrecord':
    #     pass
    # elif FLAGS.action == 'imageset':
    #     gen_image_set(FLAGS.data_dir, FLAGS.year, FLAGS.imageset)
    #     return

    for set_name, image_set_path in zip(('train', 'val'),
                                        (train_set, val_set)):
        logging.info("Generate data set %s in %s.", set_name, image_set_path)

        examples_list = dataset_util.read_examples_list(image_set_path)
        writer = tf.io.TFRecordWriter(
            os.path.splitext(output_path)[0] + '_' + set_name +
            os.path.splitext(output_path)[1])
        step = max(len(examples_list) // 10 // 100 * 100, 10)
        for idx, example in enumerate(examples_list):
            if idx % step == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            if not Path(path).exists():
                logging.error(
                    'Annotation xml %s not exist, press any key to continue..., q for quit.',
                    path)
                key = input()
                if key == 'q':
                    break
                else:
                    continue

            with tf.io.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str.encode('utf-8'))
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            # logging.info("Create tf example for %s.", path)

            tf_example = dict_to_tf_example(data, data_folder, label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            if tf_example:
                writer.write(tf_example.SerializeToString())

        writer.close()
Пример #22
0
def _load_anno_sample(anno_path):
    with open(anno_path, 'r') as fid:
        xml_str = fid.read()
    xml_str = etree.fromstring(xml_str)
    anno_data = dataset_util.recursive_parse_xml_to_dict(xml_str)['annotation']
    return anno_data
Пример #23
0
def main(_):
    logging.info('Prepare process samples in {}'.format(FLAGS.data_dir))
    data_dir = FLAGS.data_dir

    years = list(map(lambda x: x.strip(), str(FLAGS.year).split(',')))
    label_map_file = FLAGS.label_map_path
    if not os.path.exists(label_map_file):
        label_map_file = os.path.join(data_dir, 'label_map.pbtxt')
        if not os.path.exists(label_map_file):
            raise FileExistsError('label map file not exist.')

    label_map_dict = label_map_util.get_label_map_dict(label_map_file)

    # output path
    output_path = FLAGS.output_path
    if not output_path:
        output_path = '.'  # os.path.basename(os.path.dirname(data_dir+os.sep)) + '.tfrecord'
    logging.info('Prepare write samples to {}'.format(output_path))

    # 先默认比例 6:2:2 train valid test
    sample_name = os.path.basename(os.path.dirname(data_dir + os.sep))
    output_train = output_path + os.sep + sample_name + '_train.tfrecord'
    output_valid = output_path + os.sep + sample_name + '_valid.tfrecord'
    output_test = output_path + os.sep + sample_name + '_test.tfrecord'

    writers = {
        output_train: tf.io.TFRecordWriter(output_train),
        output_valid: tf.io.TFRecordWriter(output_valid),
        output_test: tf.io.TFRecordWriter(output_test),
    }

    for year in years:
        logging.info('Reading from PASCAL %s dataset.', year)

        examples_path = gen_image_set(FLAGS.data_dir, year)
        examples_list = dataset_util.read_examples_list(examples_path)

        annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)

        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.io.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str.encode('utf-8'))
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, FLAGS.data_dir, year, label_map_dict,
                                            FLAGS.ignore_difficult_instances)

            random_val = random.randint(1, 100)
            writer = writers[output_train]

            if 60 < random_val <= 80:
                writer = writers[output_valid]
            elif 80 <= random_val:
                writer = writers[output_test]

            writer.write(tf_example.SerializeToString())

    for writer in writers.values():
        writer.close()
def xxx(fs):
    for idx, example in enumerate(fs):
        print("idx", idx)
        if idx < 10:
            continue

        image_path = example[0]
        annotation_file = example[1]
        print("image_path", image_path)
        print("annotation_file", annotation_file)
        assert os.path.basename(image_path).replace(
            ".jpg",
            "") == os.path.basename(annotation_file).replace(".xml", "")
        with tf.gfile.GFile(annotation_file, 'r') as fid:
            xml_str = fid.read()
        try:
            xml = etree.fromstring(xml_str)
        except Exception as ex:
            print(ex)
            xml_str = "\n".join(xml_str.split("\n")[1:])
            xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        full_path = image_path
        with tf.gfile.GFile(full_path, 'rb') as fid:
            encoded_jpg = fid.read()
        encoded_jpg_io = io.BytesIO(encoded_jpg)
        image = PIL.Image.open(encoded_jpg_io)

        width = int(data['size']['width'])
        height = int(data['size']['height'])

        print("width", width)
        print("height", height)

        xmin = []
        ymin = []
        xmax = []
        ymax = []
        if 'object' in data:
            for obj in data['object']:
                if int(obj['name']) != 2:
                    continue

                obj_xmin = float(obj['bndbox']['xmin'])
                obj_ymin = float(obj['bndbox']['ymin'])
                obj_xmax = float(obj['bndbox']['xmax'])
                obj_ymax = float(obj['bndbox']['ymax'])

                # assert width > obj_xmin > 0
                # assert height > obj_ymin > 0
                # assert width > obj_xmax > 0
                # assert height > obj_ymax > 0
                #
                # assert obj_xmin < obj_xmax
                # assert obj_ymin < obj_ymax

                xmin.append(obj_xmin / width)
                ymin.append(obj_ymin / height)
                xmax.append(obj_xmax / width)
                ymax.append(obj_ymax / height)

                # xmin.append(obj_xmin)
                # ymin.append(obj_ymin)
                # xmax.append(obj_xmax)
                # ymax.append(obj_ymax)

        bboxes = np.array([ymin, xmin, ymax, xmax]).transpose([1, 0])

        image = image.convert("RGB")
        draw_bounding_boxes_on_image(image, bboxes, color="red", thickness=4)

        PIL.Image.Image.show(image)