Exemplo n.º 1
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""

        if params.tfds_name:
            if params.tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP:
                decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[
                    params.tfds_name]()
            else:
                raise ValueError('TFDS {} is not supported'.format(
                    params.tfds_name))
        else:
            decoder_cfg = params.decoder.get()
            if params.decoder.type == 'simple_decoder':
                decoder = tf_example_decoder.TfExampleDecoder(
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            elif params.decoder.type == 'label_map_decoder':
                decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                    label_map=decoder_cfg.label_map,
                    regenerate_source_id=decoder_cfg.regenerate_source_id)
            else:
                raise ValueError('Unknown decoder type: {}!'.format(
                    params.decoder.type))

        parser = retinanet_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            match_threshold=params.parser.match_threshold,
            unmatched_threshold=params.parser.unmatched_threshold,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
            decoder_fn=decoder.decode,
            parser_fn=parser.parse_fn(params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Exemplo n.º 2
0
    def build_inputs(self, params, input_context=None):
        """Build input dataset."""
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                regenerate_source_id=decoder_cfg.regenerate_source_id)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))
        parser = retinanet_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            match_threshold=params.parser.match_threshold,
            unmatched_threshold=params.parser.unmatched_threshold,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances)

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Exemplo n.º 3
0
    def testRetinanetInputReader(self, output_size, skip_crowd_during_training,
                                 use_autoaugment, is_training):

        batch_size = 2
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [0.5, 1.0, 2.0]
        anchor_size = 3
        max_num_instances = 100

        params = cfg.DataConfig(
            input_path='/placer/prod/home/snaggletooth/test/data/coco/val*',
            global_batch_size=batch_size,
            is_training=is_training)

        decoder = tf_example_decoder.TfExampleDecoder()
        parser = retinanet_input.Parser(
            output_size=output_size,
            min_level=min_level,
            max_level=max_level,
            num_scales=num_scales,
            aspect_ratios=aspect_ratios,
            anchor_size=anchor_size,
            skip_crowd_during_training=skip_crowd_during_training,
            use_autoaugment=use_autoaugment,
            max_num_instances=max_num_instances,
            dtype='bfloat16')

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))

        dataset = reader.read()

        iterator = iter(dataset)
        image, labels = next(iterator)
        np_image = image.numpy()
        np_labels = tf.nest.map_structure(lambda x: x.numpy(), labels)

        # Checks image shape.
        self.assertEqual(list(np_image.shape),
                         [batch_size, output_size[0], output_size[1], 3])
        # Checks keys in labels.
        if is_training:
            self.assertCountEqual(np_labels.keys(), [
                'cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights',
                'box_weights', 'image_info'
            ])
        else:
            self.assertCountEqual(np_labels.keys(), [
                'cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights',
                'box_weights', 'groundtruths', 'image_info'
            ])
        # Checks shapes of `image_info` and `anchor_boxes`.
        self.assertEqual(np_labels['image_info'].shape, (batch_size, 4, 2))
        n_anchors = 0
        for level in range(min_level, max_level + 1):
            stride = 2**level
            output_size_l = [output_size[0] / stride, output_size[1] / stride]
            anchors_per_location = num_scales * len(aspect_ratios)
            self.assertEqual(list(np_labels['anchor_boxes'][level].shape), [
                batch_size, output_size_l[0], output_size_l[1],
                4 * anchors_per_location
            ])
            n_anchors += output_size_l[0] * output_size_l[
                1] * anchors_per_location
        # Checks shapes of training objectives.
        self.assertEqual(np_labels['cls_weights'].shape,
                         (batch_size, n_anchors))
        for level in range(min_level, max_level + 1):
            stride = 2**level
            output_size_l = [output_size[0] / stride, output_size[1] / stride]
            anchors_per_location = num_scales * len(aspect_ratios)
            self.assertEqual(list(np_labels['cls_targets'][level].shape), [
                batch_size, output_size_l[0], output_size_l[1],
                anchors_per_location
            ])
            self.assertEqual(list(np_labels['box_targets'][level].shape), [
                batch_size, output_size_l[0], output_size_l[1],
                4 * anchors_per_location
            ])
        # Checks shape of groundtruths for eval.
        if not is_training:
            self.assertEqual(np_labels['groundtruths']['source_id'].shape,
                             (batch_size, ))
            self.assertEqual(np_labels['groundtruths']['classes'].shape,
                             (batch_size, max_num_instances))
            self.assertEqual(np_labels['groundtruths']['boxes'].shape,
                             (batch_size, max_num_instances, 4))
            self.assertEqual(np_labels['groundtruths']['areas'].shape,
                             (batch_size, max_num_instances))
            self.assertEqual(np_labels['groundtruths']['is_crowds'].shape,
                             (batch_size, max_num_instances))