Exemplo n.º 1
0
    def test_video_audio_input(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224
        params.output_audio = True
        params.audio_feature = AUDIO_KEY
        params.audio_feature_shape = (15, 256)

        decoder = video_input.Decoder()
        decoder.add_feature(params.audio_feature,
                            tf.io.VarLenFeature(dtype=tf.float32))
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        features, label = output_tensor
        image = features['image']
        audio = features['audio']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))
        self.assertEqual(audio.shape, (15, 256))
  def test_video_input(self):
    params = exp_cfg.kinetics600(is_training=True)
    params.feature_shape = (2, 224, 224, 3)
    params.min_image_size = 224
    decoder = video_input.Decoder()
    parser = video_input.Parser(params).parse_fn(params.is_training)

    # Create fake data.
    random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8)
    random_image = Image.fromarray(random_image)
    with io.BytesIO() as buffer:
      random_image.save(buffer, format='JPEG')
      raw_image_bytes = buffer.getvalue()

    seq_example = tf.train.SequenceExample()
    seq_example.feature_lists.feature_list.get_or_create(
        video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
            raw_image_bytes
        ]
    seq_example.feature_lists.feature_list.get_or_create(
        video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
            raw_image_bytes
        ]
    seq_example.context.feature[video_input.LABEL_KEY].int64_list.value[:] = [
        42
    ]

    input_tensor = tf.constant(seq_example.SerializeToString())
    decoded_tensors = decoder.decode(input_tensor)
    output_tensor = parser(decoded_tensors)
    image_features, label = output_tensor
    image = image_features['image']

    self.assertAllEqual(image.shape, (2, 224, 224, 3))
    self.assertAllEqual(label.shape, (600,))
  def build_inputs(self, params: exp_cfg.DataConfig, input_context=None):
    """Builds classification input."""

    parser = video_input.Parser(input_params=params)
    postprocess_fn = video_input.PostBatchProcessor(params)

    reader = input_reader.InputReader(
        params,
        dataset_fn=self._get_dataset_fn(params),
        decoder_fn=self._get_decoder_fn(params),
        parser_fn=parser.parse_fn(params.is_training),
        postprocess_fn=postprocess_fn)

    dataset = reader.read(input_context=input_context)

    return dataset
Exemplo n.º 4
0
    def test_video_input(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224

        decoder = video_input.Decoder()
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        image_features, label = output_tensor
        image = image_features['image']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))
Exemplo n.º 5
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Builds classification input."""

        parser = video_input.Parser(input_params=params,
                                    image_key=params.image_field_key,
                                    label_key=params.label_field_key)
        postprocess_fn = video_input.PostBatchProcessor(params)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=self._get_dataset_fn(params),
            decoder_fn=self._get_decoder_fn(params),
            parser_fn=parser.parse_fn(params.is_training),
            postprocess_fn=postprocess_fn)

        dataset = reader.read(input_context=input_context)

        return dataset