def test_result_shape(self, image_height, image_width, num_instances, regenerate_source_id): decoder = tf_example_decoder.TfExampleDecoder( include_mask=True, regenerate_source_id=regenerate_source_id) serialized_example = tfexample_utils.create_detection_test_example( image_height=image_height, image_width=image_width, image_channel=3, num_instances=num_instances).SerializeToString() decoded_tensors = decoder.decode( tf.convert_to_tensor(value=serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertAllEqual((image_height, image_width, 3), results['image'].shape) if not regenerate_source_id: self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id']) self.assertEqual(image_height, results['height']) self.assertEqual(image_width, results['width']) self.assertAllEqual((num_instances, ), results['groundtruth_classes'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_is_crowd'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_area'].shape) self.assertAllEqual((num_instances, 4), results['groundtruth_boxes'].shape) self.assertAllEqual((num_instances, image_height, image_width), results['groundtruth_instance_masks'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_instance_masks_png'].shape)
def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None, dataset_fn: Optional[dataset_fn_lib.PossibleDatasetType] = None ) -> tf.data.Dataset: """Build input dataset.""" decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( include_mask=self._task_config.model.include_mask, regenerate_source_id=decoder_cfg.regenerate_source_id, mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, include_mask=self._task_config.model.include_mask, regenerate_source_id=decoder_cfg.regenerate_source_id, mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = maskrcnn_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, rpn_match_threshold=params.parser.rpn_match_threshold, rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold, rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im, rpn_fg_fraction=params.parser.rpn_fg_fraction, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, aug_type=params.parser.aug_type, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances, include_mask=self._task_config.model.include_mask, mask_crop_size=params.parser.mask_crop_size) if not dataset_fn: dataset_fn = dataset_fn_lib.pick_dataset_fn(params.file_type) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn, decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def _parse_single_example(self, example): """Parses a single serialized tf.Example proto. Args: example: a serialized tf.Example proto string. Returns: A dictionary of groundtruth with the following fields: source_id: a scalar tensor of int64 representing the image source_id. height: a scalar tensor of int64 representing the image height. width: a scalar tensor of int64 representing the image width. boxes: a float tensor of shape [K, 4], representing the groundtruth boxes in absolute coordinates with respect to the original image size. classes: a int64 tensor of shape [K], representing the class labels of each instances. is_crowds: a bool tensor of shape [K], indicating whether the instance is crowd. areas: a float tensor of shape [K], indicating the area of each instance. masks: a string tensor of shape [K], containing the bytes of the png mask of each instance. """ decoder = tf_example_decoder.TfExampleDecoder( include_mask=self._include_mask, regenerate_source_id=self._regenerate_source_id) decoded_tensors = decoder.decode(example) image = decoded_tensors['image'] image_size = tf.shape(image)[0:2] boxes = box_ops.denormalize_boxes(decoded_tensors['groundtruth_boxes'], image_size) source_id = decoded_tensors['source_id'] if source_id.dtype is tf.string: source_id = tf.strings.to_number(source_id, out_type=tf.int64) groundtruths = { 'source_id': source_id, 'height': decoded_tensors['height'], 'width': decoded_tensors['width'], 'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0], 'boxes': boxes, 'classes': decoded_tensors['groundtruth_classes'], 'is_crowds': decoded_tensors['groundtruth_is_crowd'], 'areas': decoded_tensors['groundtruth_area'], } if self._include_mask: groundtruths.update({ 'masks': decoded_tensors['groundtruth_instance_masks_png'], }) return groundtruths
def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" if params.tfds_name: decoder = tfds_factory.get_detection_decoder(params.tfds_name) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = retinanet_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, match_threshold=params.parser.match_threshold, unmatched_threshold=params.parser.unmatched_threshold, aug_type=params.parser.aug_type, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" if params.tfds_name: decoder = tfds_factory.get_detection_decoder(params.tfds_name) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = centernet_input.CenterNetParser( output_height=self.task_config.model.input_size[0], output_width=self.task_config.model.input_size[1], max_num_instances=self.task_config.model.max_num_instances, bgr_ordering=params.parser.bgr_ordering, channel_means=params.parser.channel_means, channel_stds=params.parser.channel_stds, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, aug_rand_hue=params.parser.aug_rand_hue, aug_rand_brightness=params.parser.aug_rand_brightness, aug_rand_contrast=params.parser.aug_rand_contrast, aug_rand_saturation=params.parser.aug_rand_saturation, odapi_augmentation=params.parser.odapi_augmentation, dtype=params.dtype) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def build_inputs( self, params, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" if isinstance(params, coco.COCODataConfig): dataset = coco.COCODataLoader(params).load(input_context) else: if params.tfds_name: decoder = tfds_factory.get_detection_decoder(params.tfds_name) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = detr_input.Parser( class_offset=self._task_config.losses.class_offset, output_size=self._task_config.model.input_size[:2], ) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def test_result_content(self): decoder = tf_example_decoder.TfExampleDecoder(include_mask=True) image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG') image_height = 4 image_width = 4 num_instances = 2 xmins = [0, 0.25] xmaxs = [0.5, 1.0] ymins = [0, 0] ymaxs = [0.5, 1.0] labels = [3, 1] areas = [ 0.25 * image_height * image_width, 0.75 * image_height * image_width ] is_crowds = [1, 0] mask_content = [[[255, 255, 0, 0], [255, 255, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 255, 255, 255], [0, 255, 255, 255], [0, 255, 255, 255], [0, 255, 255, 255]]] masks = [ tfexample_utils.encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content) ] serialized_example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': (tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature( bytes_list=tf.train.BytesList( value=[tfexample_utils.DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature( int64_list=tf.train.Int64List(value=[image_height]))), 'image/width': (tf.train.Feature(int64_list=tf.train.Int64List( value=[image_width]))), 'image/object/bbox/xmin': (tf.train.Feature( float_list=tf.train.FloatList(value=xmins))), 'image/object/bbox/xmax': (tf.train.Feature( float_list=tf.train.FloatList(value=xmaxs))), 'image/object/bbox/ymin': (tf.train.Feature( float_list=tf.train.FloatList(value=ymins))), 'image/object/bbox/ymax': (tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))), 'image/object/class/label': (tf.train.Feature( int64_list=tf.train.Int64List(value=labels))), 'image/object/is_crowd': (tf.train.Feature( int64_list=tf.train.Int64List(value=is_crowds))), 'image/object/area': (tf.train.Feature( float_list=tf.train.FloatList(value=areas))), 'image/object/mask': (tf.train.Feature( bytes_list=tf.train.BytesList(value=masks))), })).SerializeToString() decoded_tensors = decoder.decode( tf.convert_to_tensor(value=serialized_example)) results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) self.assertAllEqual((image_height, image_width, 3), results['image'].shape) self.assertAllEqual(image_content, results['image']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id']) self.assertEqual(image_height, results['height']) self.assertEqual(image_width, results['width']) self.assertAllEqual((num_instances, ), results['groundtruth_classes'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_is_crowd'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_area'].shape) self.assertAllEqual((num_instances, 4), results['groundtruth_boxes'].shape) self.assertAllEqual((num_instances, image_height, image_width), results['groundtruth_instance_masks'].shape) self.assertAllEqual((num_instances, ), results['groundtruth_instance_masks_png'].shape) self.assertAllEqual([3, 1], results['groundtruth_classes']) self.assertAllEqual([True, False], results['groundtruth_is_crowd']) self.assertNDArrayNear([ 0.25 * image_height * image_width, 0.75 * image_height * image_width ], results['groundtruth_area'], 1e-4) self.assertNDArrayNear([[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]], results['groundtruth_boxes'], 1e-4) self.assertNDArrayNear(mask_content, results['groundtruth_instance_masks'], 1e-4) self.assertAllEqual(masks, results['groundtruth_instance_masks_png'])