Exemplo n.º 1
0
    def _load_trainvalsplit(self, ):
        assert self._split in ['trainval2014']

        annotation_dir = os.path.join(self._data_dir, 'annotations')
        minival_path = os.path.join(annotation_dir,
                                    'instances_minival2014.json')
        minival2014_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0'
        assert os.path.exists(
            minival_path), 'need to download %s minival split to %s' % (
                minival2014_url, minival_path)

        import ujson as json
        with open(minival_path, 'r') as f:
            self._minival = json.load(f)

        annFile = os.path.join(annotation_dir, 'instances_train2014.json')
        coco_train = COCO(annFile)
        annFile = os.path.join(annotation_dir, 'instances_val2014.json')
        coco_val = COCO(annFile)

        imgs1 = [(img_id, coco_train.imgs[img_id], 0)
                 for img_id in coco_train.imgs if img_id != 320612]
        imgs2 = [(img_id, coco_val.imgs[img_id], 1) for img_id in coco_val.imgs
                 if not self._is_in_minival(img_id) and img_id != 320612]

        imgs = imgs1 + imgs2

        self._data_size = len(imgs)
        self._imgs = imgs
        self._cocos = (coco_train, coco_val)
        self.classes = [u'background'] + [
            cls['name'] for cls in coco_train.loadCats(coco_train.getCatIds())
        ]

        return
Exemplo n.º 2
0
    def _load_trainvalsplit(self, ):
        assert self._split in ['trainval2014']

        annotation_dir = os.path.join(self._data_dir, 'annotations')
        annFile = os.path.join(annotation_dir, 'instances_trainval2014.json')
        coco_train = COCO(annFile)

        imgs = [(img_id, coco_train.imgs[img_id], 0)
                for img_id in coco_train.imgs if img_id != 320612]

        self._data_size = len(imgs)
        self._imgs = imgs
        self._cocos = (coco_train, )
        self.classes = [u'background'] + [
            cls['name'] for cls in coco_train.loadCats(coco_train.getCatIds())
        ]

        return
Exemplo n.º 3
0
    def _load(self, ):
        assert self._split in ['train2014', 'val2014', 'minival2014']

        annFile = os.path.join(self._data_dir, 'annotations',
                               'instances_%s.json' % (self._split))
        coco = COCO(annFile)
        # imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs]
        imgs = []
        for img_id in coco.imgs:
            if img_id == 320612:
                continue
            imgs.append((img_id, coco.imgs[img_id], 0))

        self._data_size = len(imgs)
        self._imgs = imgs
        self._cocos = (coco, )
        self.classes = [u'background'] + [
            cls['name'] for cls in coco.loadCats(coco.getCatIds())
        ]

        return
def _add_to_tfrecord_trainvalsplit(record_dir, image_dir, annotation_dir,
                                   split_name):
    """Loads image files and writes files to a TFRecord.
  Note: masks and bboxes will lose shape info after converting to string.
  """

    assert split_name in ['trainval2014', 'minival2014']
    # NOTE: this instances_minival2014.json file cannot be processed by official COCO API,
    # so just use its id list, ['images']['id']
    minival_path = os.path.join(annotation_dir, 'instances_minival2014.json')
    minival2014_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0'
    assert os.path.exists(
        minival_path), 'need to download %s minival split to %s' % (
            minival2014_url, minival_path)

    import ujson as json
    with open(minival_path, 'r') as f:
        minival = json.load(f)

    def is_in_minival(img_id, minival):
        for img in minival['images']:
            if (img['id']) == (img_id):
                return True
        return False

    annFile = os.path.join(annotation_dir, 'instances_train2014.json')
    coco_train = COCO(annFile)
    annFile = os.path.join(annotation_dir, 'instances_val2014.json')
    coco_val = COCO(annFile)
    cats = coco_train.loadCats(coco_train.getCatIds())
    # imgs = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] + \
    #        [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs]

    imgs1 = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs]
    imgs2 = [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs]
    imgs = imgs1 + imgs2

    num_of_train = len(coco_train.imgs)
    num_of_all = len(imgs)

    num_per_shard = 2500
    num_shards = int(
        np.ceil((len(imgs) + 0.0 - len(minival['images'])) / num_per_shard))
    if split_name == 'minival2014':
        num_shards = int(
            np.ceil((len(minival['images']) + 0.0) / num_per_shard))

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        image_reader = ImageReader()

        # encode mask to png_string
        mask_placeholder = tf.placeholder(dtype=tf.uint8)
        encoded_image = tf.image.encode_png(mask_placeholder)

        with tf.Session('') as sess:

            cnt = 0
            shard_id = -1
            for i in range(len(imgs)):
                img_id = imgs[i][0]
                img_name = imgs[i][1]['file_name']
                split = img_name.split('_')[1]
                img_name = os.path.join(image_dir, split, img_name)

                if str(img_id) == '320612':
                    continue
                is_minival = is_in_minival(img_id, minival)

                if split_name == 'trainval2014' and is_minival:
                    continue
                if split_name == 'minival2014' and not is_minival:
                    continue

                cnt += 1

                if cnt % num_per_shard == 1:
                    shard_id += 1
                    record_filename = _get_dataset_filename(
                        record_dir, split_name, shard_id, num_shards)
                    options = tf.python_io.TFRecordOptions(
                        TFRecordCompressionType.ZLIB)
                    tfrecord_writer = tf.python_io.TFRecordWriter(
                        record_filename, options=options)

                if cnt % 100 == 1:
                    print('%d (image_id: %d) of %d, split: %s, shard_id: %d' %
                          (i, img_id, len(imgs), split_name, shard_id))

                # process anns
                height, width = imgs[i][1]['height'], imgs[i][1]['width']
                coco = coco_train if i < num_of_train else coco_val

                gt_boxes, masks, mask = _get_coco_masks(
                    coco, img_id, height, width, img_name)

                # read image as RGB numpy
                img = np.array(Image.open(img_name))
                if img.size == height * width:
                    print('Gray Image %s' % str(img_id))
                    im = np.empty((height, width, 3), dtype=np.uint8)
                    im[:, :, :] = img[:, :, np.newaxis]
                    img = im

                img = img.astype(np.uint8)
                assert img.size == width * height * 3, '%s' % str(img_id)

                img_raw = img.tostring()
                mask_raw = mask.tostring()

                example = _to_tfexample_coco_raw(img_id, img_raw, mask_raw,
                                                 height, width,
                                                 gt_boxes.shape[0],
                                                 gt_boxes.tostring(),
                                                 masks.tostring())

                tfrecord_writer.write(example.SerializeToString())

                if cnt % num_per_shard == 0 or i == len(imgs) - 1:
                    tfrecord_writer.close()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name):
    """Loads image files and writes files to a TFRecord.
  Note: masks and bboxes will lose shape info after converting to string.
  """

    assert split_name in [
        'train2014', 'val2014', 'valminusminival2014', 'minival2014'
    ]
    annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name))

    coco = COCO(annFile)

    cats = coco.loadCats(coco.getCatIds())
    print('%s has %d images' % (split_name, len(coco.imgs)))
    imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs]

    num_shards = int(len(imgs) / 2500)
    num_per_shard = int(math.ceil(len(imgs) / float(num_shards)))

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        image_reader = ImageReader()

        # encode mask to png_string
        mask_placeholder = tf.placeholder(dtype=tf.uint8)
        encoded_image = tf.image.encode_png(mask_placeholder)

        with tf.Session('') as sess:
            for shard_id in range(num_shards):
                record_filename = _get_dataset_filename(
                    record_dir, split_name, shard_id, num_shards)
                options = tf.python_io.TFRecordOptions(
                    TFRecordCompressionType.ZLIB)
                with tf.python_io.TFRecordWriter(
                        record_filename, options=options) as tfrecord_writer:
                    start_ndx = shard_id * num_per_shard
                    end_ndx = min((shard_id + 1) * num_per_shard, len(imgs))
                    for i in range(start_ndx, end_ndx):
                        if i % 50 == 0:
                            sys.stdout.write(
                                '\r>> Converting image %d/%d shard %d\n' %
                                (i + 1, len(imgs), shard_id))
                            sys.stdout.flush()

                        # image id and path
                        img_id = imgs[i][0]
                        img_name = imgs[i][1]['file_name']
                        split = img_name.split('_')[1]
                        img_name = os.path.join(image_dir, split, img_name)

                        if FLAGS.vis:
                            im = Image.open(img_name)
                            im.save('img.png')
                            plt.figure(0)
                            plt.axis('off')
                            plt.imshow(im)
                            # plt.show()
                            # plt.close()

                        # jump over the damaged images
                        if str(img_id) == '320612':
                            continue

                        # process anns
                        height, width = imgs[i][1]['height'], imgs[i][1][
                            'width']
                        gt_boxes, masks, mask = _get_coco_masks(
                            coco, img_id, height, width, img_name)

                        # read image as RGB numpy
                        img = np.array(Image.open(img_name))
                        if img.size == height * width:
                            print('Gray Image %s' % str(img_id))
                            im = np.empty((height, width, 3), dtype=np.uint8)
                            im[:, :, :] = img[:, :, np.newaxis]
                            img = im

                        img = img.astype(np.uint8)
                        assert img.size == width * height * 3, '%s' % str(
                            img_id)

                        img_raw = img.tostring()
                        mask_raw = mask.tostring()

                        example = _to_tfexample_coco_raw(
                            img_id, img_raw, mask_raw, height, width,
                            gt_boxes.shape[0], gt_boxes.tostring(),
                            masks.tostring())

                        tfrecord_writer.write(example.SerializeToString())
    sys.stdout.write('\n')
    sys.stdout.flush()
Exemplo n.º 6
0
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name):
    """Loads image files and writes files to a TFRecord.
  Note: masks and bboxes will lose shape info after converting to string.
  """

    assert split_name in ['train2014', 'val2014']
    annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name))

    coco = COCO(annFile)

    cats = coco.loadCats(coco.getCatIds())
    print('%s has %d images' % (split_name, len(coco.imgs)))
    imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs]

    num_shards = 40 if split_name == 'train2014' else 20
    num_per_shard = int(math.ceil(len(imgs) / float(num_shards)))

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        image_reader = ImageReader()

        # encode mask to png_string
        mask_placeholder = tf.placeholder(dtype=tf.uint8)
        encoded_image = tf.image.encode_png(mask_placeholder)

        with tf.Session('') as sess:
            for shard_id in range(num_shards):
                record_filename = _get_dataset_filename(
                    record_dir, split_name, shard_id, num_shards)
                options = tf.python_io.TFRecordOptions(
                    TFRecordCompressionType.ZLIB)
                with tf.python_io.TFRecordWriter(
                        record_filename, options=options) as tfrecord_writer:
                    start_ndx = shard_id * num_per_shard
                    end_ndx = min((shard_id + 1) * num_per_shard, len(imgs))
                    for i in range(start_ndx, end_ndx):
                        sys.stdout.write(
                            '\r>> Converting image %d/%d shard %d\n' %
                            (i + 1, len(imgs), shard_id))
                        sys.stdout.flush()

                        # image id and path
                        img_id = imgs[i][0]
                        img_name = imgs[i][1]['file_name']
                        img_name = os.path.join(image_dir, split_name,
                                                img_name)

                        if FLAGS.vis:
                            im = Image.open(img_name)
                            im.save('img.png')
                            plt.figure(0)
                            plt.axis('off')
                            plt.imshow(im)
                            # plt.show()
                            # plt.close()

                        # jump over the damaged images
                        if split_name == 'val2014' and str(img_id) == '320612':
                            continue

                        # process anns
                        h, w = imgs[i][1]['height'], imgs[i][1]['width']
                        classes, bboxes, masks, mask = _get_coco_masks(
                            coco, img_id, h, w)
                        assert classes.shape[0] == bboxes.shape[0] == masks.shape[0], \
                          'Check number of instances for %s' % (img_name)
                        # this encode matrix to png format string buff
                        label_data = sess.run(encoded_image,
                                              feed_dict={
                                                  mask_placeholder:
                                                  np.expand_dims(mask, axis=2)
                                              })

                        # read image
                        assert os.path.exists(
                            img_name), '%s dont exists' % img_name
                        image_data = tf.gfile.FastGFile(img_name, 'r').read()
                        height, width, depth = image_reader.read_jpeg_dims(
                            sess, image_data)

                        # to tf-record
                        example = _to_tfexample_v2(image_data, 'jpg',
                                                   label_data, 'png', height,
                                                   width, classes.shape[0],
                                                   classes.tolist(),
                                                   bboxes.tostring(),
                                                   masks.tostring())
                        tfrecord_writer.write(example.SerializeToString())
    sys.stdout.write('\n')
    sys.stdout.flush()
def _add_to_tfrecord_trainvalsplit(record_dir, image_dir, annotation_dir, split_name):
  """Loads image files and writes files to a TFRecord.
  Note: masks and bboxes will lose shape info after converting to string.
  """

  assert split_name in ['trainval2014', 'minival2014']
  # NOTE: this instances_minival2014.json file cannot be processed by official COCO API,
  # so just use its id list, ['images']['id']
  minival_path = os.path.join(annotation_dir, 'instances_minival2014.json')
  minival2014_url='https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0'
  assert os.path.exists(minival_path), 'need to download %s minival split to %s' %(minival2014_url, minival_path)
  
  import ujson as json
  with open(minival_path, 'r') as f:
      minival = json.load(f)

  def is_in_minival(img_id, minival):
      for img in minival['images']:
          if (img['id']) == (img_id):
              return True
      return False

  annFile = os.path.join(annotation_dir, 'instances_train2014.json')
  coco_train = COCO(annFile)
  annFile = os.path.join(annotation_dir, 'instances_val2014.json')
  coco_val = COCO(annFile)
  cats = coco_train.loadCats(coco_train.getCatIds())
  # imgs = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] + \ 
  #        [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs]
  
  imgs1 = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs]
  imgs2 = [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs]
  imgs = imgs1 + imgs2

  num_of_train = len(coco_train.imgs)
  num_of_all   = len(imgs)

  num_per_shard = 2500
  num_shards = int(np.ceil((len(imgs) + 0.0 - len(minival['images'])) / num_per_shard))
  if split_name == 'minival2014':
    num_shards = int(np.ceil((len(minival['images']) + 0.0) / num_per_shard))
      
  with tf.Graph().as_default(), tf.device('/cpu:0'):
  
    image_reader = ImageReader()
    
    # encode mask to png_string
    mask_placeholder = tf.placeholder(dtype=tf.uint8)
    encoded_image = tf.image.encode_png(mask_placeholder)
    
    with tf.Session('') as sess:
        
        cnt = 0
        shard_id = -1
        for i in range(len(imgs)):
            img_id = imgs[i][0]
            img_name = imgs[i][1]['file_name']
            split = img_name.split('_')[1]
            img_name = os.path.join(image_dir, split, img_name)

            if str(img_id) == '320612':
                continue
            is_minival = is_in_minival(img_id, minival)

            if split_name == 'trainval2014' and is_minival:
                continue
            if split_name == 'minival2014' and not is_minival:
                continue

            cnt += 1
            
            if cnt % num_per_shard == 1:
                shard_id += 1
                record_filename = _get_dataset_filename(record_dir, split_name, shard_id, num_shards)
                options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
                tfrecord_writer = tf.python_io.TFRecordWriter(record_filename, options=options) 

            if cnt % 100 == 1:
                print ('%d (image_id: %d) of %d, split: %s, shard_id: %d' %(i, img_id, len(imgs), split_name, shard_id))

            
            # process anns
            height, width = imgs[i][1]['height'], imgs[i][1]['width']
            coco = coco_train if i < num_of_train else coco_val

            gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name)
            
            # read image as RGB numpy
            img = np.array(Image.open(img_name))
            if img.size == height * width:
                print ('Gray Image %s' % str(img_id))
                im = np.empty((height, width, 3), dtype=np.uint8)
                im[:, :, :] = img[:, :, np.newaxis]
                img = im

            img = img.astype(np.uint8)
            assert img.size == width * height * 3, '%s' % str(img_id)

            img_raw = img.tostring()
            mask_raw = mask.tostring()
            
            example = _to_tfexample_coco_raw(
              img_id,
              img_raw,
              mask_raw,
              height, width, gt_boxes.shape[0],
              gt_boxes.tostring(), masks.tostring())
            
            tfrecord_writer.write(example.SerializeToString())

            if cnt % num_per_shard == 0 or i == len(imgs)-1:
                tfrecord_writer.close()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name):
  """Loads image files and writes files to a TFRecord.
  Note: masks and bboxes will lose shape info after converting to string.
  """

  assert split_name in ['train2014', 'val2014', 'valminusminival2014', 'minival2014']
  annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name))
  
  coco = COCO(annFile)

  cats = coco.loadCats(coco.getCatIds())
  print ('%s has %d images' %(split_name, len(coco.imgs)))
  imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs]
  
  num_shards = int(len(imgs) / 2500)
  num_per_shard = int(math.ceil(len(imgs) / float(num_shards)))
  
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    image_reader = ImageReader()
    
    # encode mask to png_string
    mask_placeholder = tf.placeholder(dtype=tf.uint8)
    encoded_image = tf.image.encode_png(mask_placeholder)
    
    with tf.Session('') as sess:
      for shard_id in range(num_shards):
        record_filename = _get_dataset_filename(record_dir, split_name, shard_id, num_shards)
        options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
        with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
          start_ndx = shard_id * num_per_shard
          end_ndx = min((shard_id + 1) * num_per_shard, len(imgs))
          for i in range(start_ndx, end_ndx):
            if i % 50 == 0:
                sys.stdout.write('\r>> Converting image %d/%d shard %d\n' % (
                  i + 1, len(imgs), shard_id))
                sys.stdout.flush()
            
            # image id and path
            img_id = imgs[i][0]
            img_name = imgs[i][1]['file_name']
            split = img_name.split('_')[1]
            img_name = os.path.join(image_dir, split, img_name)
            
            if FLAGS.vis:
              im = Image.open(img_name)
              im.save('img.png')
              plt.figure(0)
              plt.axis('off')
              plt.imshow(im)
              # plt.show()
              # plt.close()
            
            # jump over the damaged images
            if str(img_id) == '320612':
              continue
            
            # process anns
            height, width = imgs[i][1]['height'], imgs[i][1]['width']
            gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name)
            
            # read image as RGB numpy
            img = np.array(Image.open(img_name))
            if img.size == height * width:
                print ('Gray Image %s' % str(img_id))
                im = np.empty((height, width, 3), dtype=np.uint8)
                im[:, :, :] = img[:, :, np.newaxis]
                img = im

            img = img.astype(np.uint8)
            assert img.size == width * height * 3, '%s' % str(img_id)

            img_raw = img.tostring()
            mask_raw = mask.tostring()
            
            example = _to_tfexample_coco_raw(
              img_id,
              img_raw,
              mask_raw,
              height, width, gt_boxes.shape[0],
              gt_boxes.tostring(), masks.tostring())
            
            tfrecord_writer.write(example.SerializeToString())
  sys.stdout.write('\n')
  sys.stdout.flush()