def _load_trainvalsplit(self, ): assert self._split in ['trainval2014'] annotation_dir = os.path.join(self._data_dir, 'annotations') minival_path = os.path.join(annotation_dir, 'instances_minival2014.json') minival2014_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0' assert os.path.exists( minival_path), 'need to download %s minival split to %s' % ( minival2014_url, minival_path) import ujson as json with open(minival_path, 'r') as f: self._minival = json.load(f) annFile = os.path.join(annotation_dir, 'instances_train2014.json') coco_train = COCO(annFile) annFile = os.path.join(annotation_dir, 'instances_val2014.json') coco_val = COCO(annFile) imgs1 = [(img_id, coco_train.imgs[img_id], 0) for img_id in coco_train.imgs if img_id != 320612] imgs2 = [(img_id, coco_val.imgs[img_id], 1) for img_id in coco_val.imgs if not self._is_in_minival(img_id) and img_id != 320612] imgs = imgs1 + imgs2 self._data_size = len(imgs) self._imgs = imgs self._cocos = (coco_train, coco_val) self.classes = [u'background'] + [ cls['name'] for cls in coco_train.loadCats(coco_train.getCatIds()) ] return
def _load_trainvalsplit(self, ): assert self._split in ['trainval2014'] annotation_dir = os.path.join(self._data_dir, 'annotations') annFile = os.path.join(annotation_dir, 'instances_trainval2014.json') coco_train = COCO(annFile) imgs = [(img_id, coco_train.imgs[img_id], 0) for img_id in coco_train.imgs if img_id != 320612] self._data_size = len(imgs) self._imgs = imgs self._cocos = (coco_train, ) self.classes = [u'background'] + [ cls['name'] for cls in coco_train.loadCats(coco_train.getCatIds()) ] return
def _load(self, ): assert self._split in ['train2014', 'val2014', 'minival2014'] annFile = os.path.join(self._data_dir, 'annotations', 'instances_%s.json' % (self._split)) coco = COCO(annFile) # imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] imgs = [] for img_id in coco.imgs: if img_id == 320612: continue imgs.append((img_id, coco.imgs[img_id], 0)) self._data_size = len(imgs) self._imgs = imgs self._cocos = (coco, ) self.classes = [u'background'] + [ cls['name'] for cls in coco.loadCats(coco.getCatIds()) ] return
def _add_to_tfrecord_trainvalsplit(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['trainval2014', 'minival2014'] # NOTE: this instances_minival2014.json file cannot be processed by official COCO API, # so just use its id list, ['images']['id'] minival_path = os.path.join(annotation_dir, 'instances_minival2014.json') minival2014_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0' assert os.path.exists( minival_path), 'need to download %s minival split to %s' % ( minival2014_url, minival_path) import ujson as json with open(minival_path, 'r') as f: minival = json.load(f) def is_in_minival(img_id, minival): for img in minival['images']: if (img['id']) == (img_id): return True return False annFile = os.path.join(annotation_dir, 'instances_train2014.json') coco_train = COCO(annFile) annFile = os.path.join(annotation_dir, 'instances_val2014.json') coco_val = COCO(annFile) cats = coco_train.loadCats(coco_train.getCatIds()) # imgs = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] + \ # [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs1 = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] imgs2 = [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs = imgs1 + imgs2 num_of_train = len(coco_train.imgs) num_of_all = len(imgs) num_per_shard = 2500 num_shards = int( np.ceil((len(imgs) + 0.0 - len(minival['images'])) / num_per_shard)) if split_name == 'minival2014': num_shards = int( np.ceil((len(minival['images']) + 0.0) / num_per_shard)) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: cnt = 0 shard_id = -1 for i in range(len(imgs)): img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if str(img_id) == '320612': continue is_minival = is_in_minival(img_id, minival) if split_name == 'trainval2014' and is_minival: continue if split_name == 'minival2014' and not is_minival: continue cnt += 1 if cnt % num_per_shard == 1: shard_id += 1 record_filename = _get_dataset_filename( record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions( TFRecordCompressionType.ZLIB) tfrecord_writer = tf.python_io.TFRecordWriter( record_filename, options=options) if cnt % 100 == 1: print('%d (image_id: %d) of %d, split: %s, shard_id: %d' % (i, img_id, len(imgs), split_name, shard_id)) # process anns height, width = imgs[i][1]['height'], imgs[i][1]['width'] coco = coco_train if i < num_of_train else coco_val gt_boxes, masks, mask = _get_coco_masks( coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str(img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw(img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) if cnt % num_per_shard == 0 or i == len(imgs) - 1: tfrecord_writer.close()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in [ 'train2014', 'val2014', 'valminusminival2014', 'minival2014' ] annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name)) coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) print('%s has %d images' % (split_name, len(coco.imgs))) imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] num_shards = int(len(imgs) / 2500) num_per_shard = int(math.ceil(len(imgs) / float(num_shards))) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: for shard_id in range(num_shards): record_filename = _get_dataset_filename( record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions( TFRecordCompressionType.ZLIB) with tf.python_io.TFRecordWriter( record_filename, options=options) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(imgs)) for i in range(start_ndx, end_ndx): if i % 50 == 0: sys.stdout.write( '\r>> Converting image %d/%d shard %d\n' % (i + 1, len(imgs), shard_id)) sys.stdout.flush() # image id and path img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if FLAGS.vis: im = Image.open(img_name) im.save('img.png') plt.figure(0) plt.axis('off') plt.imshow(im) # plt.show() # plt.close() # jump over the damaged images if str(img_id) == '320612': continue # process anns height, width = imgs[i][1]['height'], imgs[i][1][ 'width'] gt_boxes, masks, mask = _get_coco_masks( coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str( img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['train2014', 'val2014'] annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name)) coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) print('%s has %d images' % (split_name, len(coco.imgs))) imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] num_shards = 40 if split_name == 'train2014' else 20 num_per_shard = int(math.ceil(len(imgs) / float(num_shards))) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: for shard_id in range(num_shards): record_filename = _get_dataset_filename( record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions( TFRecordCompressionType.ZLIB) with tf.python_io.TFRecordWriter( record_filename, options=options) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(imgs)) for i in range(start_ndx, end_ndx): sys.stdout.write( '\r>> Converting image %d/%d shard %d\n' % (i + 1, len(imgs), shard_id)) sys.stdout.flush() # image id and path img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] img_name = os.path.join(image_dir, split_name, img_name) if FLAGS.vis: im = Image.open(img_name) im.save('img.png') plt.figure(0) plt.axis('off') plt.imshow(im) # plt.show() # plt.close() # jump over the damaged images if split_name == 'val2014' and str(img_id) == '320612': continue # process anns h, w = imgs[i][1]['height'], imgs[i][1]['width'] classes, bboxes, masks, mask = _get_coco_masks( coco, img_id, h, w) assert classes.shape[0] == bboxes.shape[0] == masks.shape[0], \ 'Check number of instances for %s' % (img_name) # this encode matrix to png format string buff label_data = sess.run(encoded_image, feed_dict={ mask_placeholder: np.expand_dims(mask, axis=2) }) # read image assert os.path.exists( img_name), '%s dont exists' % img_name image_data = tf.gfile.FastGFile(img_name, 'r').read() height, width, depth = image_reader.read_jpeg_dims( sess, image_data) # to tf-record example = _to_tfexample_v2(image_data, 'jpg', label_data, 'png', height, width, classes.shape[0], classes.tolist(), bboxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush()
def _add_to_tfrecord_trainvalsplit(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['trainval2014', 'minival2014'] # NOTE: this instances_minival2014.json file cannot be processed by official COCO API, # so just use its id list, ['images']['id'] minival_path = os.path.join(annotation_dir, 'instances_minival2014.json') minival2014_url='https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0' assert os.path.exists(minival_path), 'need to download %s minival split to %s' %(minival2014_url, minival_path) import ujson as json with open(minival_path, 'r') as f: minival = json.load(f) def is_in_minival(img_id, minival): for img in minival['images']: if (img['id']) == (img_id): return True return False annFile = os.path.join(annotation_dir, 'instances_train2014.json') coco_train = COCO(annFile) annFile = os.path.join(annotation_dir, 'instances_val2014.json') coco_val = COCO(annFile) cats = coco_train.loadCats(coco_train.getCatIds()) # imgs = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] + \ # [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs1 = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] imgs2 = [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs = imgs1 + imgs2 num_of_train = len(coco_train.imgs) num_of_all = len(imgs) num_per_shard = 2500 num_shards = int(np.ceil((len(imgs) + 0.0 - len(minival['images'])) / num_per_shard)) if split_name == 'minival2014': num_shards = int(np.ceil((len(minival['images']) + 0.0) / num_per_shard)) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: cnt = 0 shard_id = -1 for i in range(len(imgs)): img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if str(img_id) == '320612': continue is_minival = is_in_minival(img_id, minival) if split_name == 'trainval2014' and is_minival: continue if split_name == 'minival2014' and not is_minival: continue cnt += 1 if cnt % num_per_shard == 1: shard_id += 1 record_filename = _get_dataset_filename(record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) tfrecord_writer = tf.python_io.TFRecordWriter(record_filename, options=options) if cnt % 100 == 1: print ('%d (image_id: %d) of %d, split: %s, shard_id: %d' %(i, img_id, len(imgs), split_name, shard_id)) # process anns height, width = imgs[i][1]['height'], imgs[i][1]['width'] coco = coco_train if i < num_of_train else coco_val gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print ('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str(img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) if cnt % num_per_shard == 0 or i == len(imgs)-1: tfrecord_writer.close()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['train2014', 'val2014', 'valminusminival2014', 'minival2014'] annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name)) coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) print ('%s has %d images' %(split_name, len(coco.imgs))) imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] num_shards = int(len(imgs) / 2500) num_per_shard = int(math.ceil(len(imgs) / float(num_shards))) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: for shard_id in range(num_shards): record_filename = _get_dataset_filename(record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(imgs)) for i in range(start_ndx, end_ndx): if i % 50 == 0: sys.stdout.write('\r>> Converting image %d/%d shard %d\n' % ( i + 1, len(imgs), shard_id)) sys.stdout.flush() # image id and path img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if FLAGS.vis: im = Image.open(img_name) im.save('img.png') plt.figure(0) plt.axis('off') plt.imshow(im) # plt.show() # plt.close() # jump over the damaged images if str(img_id) == '320612': continue # process anns height, width = imgs[i][1]['height'], imgs[i][1]['width'] gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print ('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str(img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush()