Exemplo n.º 1
0
 def parse_fn(sequence_example):
   """Parses a clip classification example."""
   context_features = {
       ms.get_example_id_key():
           ms.get_example_id_default_parser(),
       ms.get_clip_label_index_key():
           ms.get_clip_label_index_default_parser(),
       ms.get_clip_label_string_key():
           ms.get_clip_label_string_default_parser()
   }
   sequence_features = {
       ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(),
   }
   parsed_context, parsed_sequence = tf.io.parse_single_sequence_example(
       sequence_example, context_features, sequence_features)
   example_id = parsed_context[ms.get_example_id_key()]
   classification_target = tf.one_hot(
       tf.sparse_tensor_to_dense(
           parsed_context[ms.get_clip_label_index_key()]), NUM_CLASSES)
   images = tf.map_fn(
       tf.image.decode_jpeg,
       parsed_sequence[ms.get_image_encoded_key()],
       back_prop=False,
       dtype=tf.uint8)
   return {
       "id": example_id,
       "labels": classification_target,
       "images": images,
   }
Exemplo n.º 2
0
        def parse_fn(sequence_example):
            """Parses a Kinetics example."""
            context_features = {
                ms.get_example_id_key(): ms.get_example_id_default_parser(),
            }
            if parse_labels:
                context_features[
                    ms.get_clip_label_string_key()] = tf.FixedLenFeature(
                        (), tf.string)
                context_features[
                    ms.get_clip_label_index_key()] = tf.FixedLenFeature(
                        (), tf.int64)

            sequence_features = {
                ms.get_image_encoded_key():
                ms.get_image_encoded_default_parser(),
                ms.get_forward_flow_encoded_key():
                ms.get_forward_flow_encoded_default_parser(),
            }
            parsed_context, parsed_sequence = tf.io.parse_single_sequence_example(
                sequence_example, context_features, sequence_features)

            images = tf.image.convert_image_dtype(
                tf.map_fn(tf.image.decode_jpeg,
                          parsed_sequence[ms.get_image_encoded_key()],
                          back_prop=False,
                          dtype=tf.uint8), tf.float32)
            num_frames = tf.shape(images)[0]

            flow = tf.image.convert_image_dtype(
                tf.map_fn(tf.image.decode_jpeg,
                          parsed_sequence[ms.get_forward_flow_encoded_key()],
                          back_prop=False,
                          dtype=tf.uint8), tf.float32)
            # The flow is quantized for storage in JPEGs by the FlowToImageCalculator.
            # The quantization needs to be inverted.
            flow = (flow[:, :, :, :2] - 0.5) * 2 * 20.

            output_dict = {
                "images": images,
                "flow": flow,
                "num_frames": num_frames,
            }
            if parse_labels:
                target = tf.one_hot(
                    parsed_context[ms.get_clip_label_index_key()], 700)
                output_dict["labels"] = target
            return output_dict
Exemplo n.º 3
0
        def parse_fn(sequence_example):
            """Parses a Charades example."""
            context_features = {
                ms.get_example_id_key():
                ms.get_example_id_default_parser(),
                ms.get_segment_start_index_key():
                (ms.get_segment_start_index_default_parser()),
                ms.get_segment_end_index_key():
                (ms.get_segment_end_index_default_parser()),
                ms.get_segment_label_index_key():
                (ms.get_segment_label_index_default_parser()),
                ms.get_segment_label_string_key():
                (ms.get_segment_label_string_default_parser()),
                ms.get_segment_start_timestamp_key():
                (ms.get_segment_start_timestamp_default_parser()),
                ms.get_segment_end_timestamp_key():
                (ms.get_segment_end_timestamp_default_parser()),
                ms.get_image_frame_rate_key():
                (ms.get_image_frame_rate_default_parser()),
            }

            sequence_features = {
                ms.get_image_encoded_key():
                ms.get_image_encoded_default_parser()
            }
            parsed_context, parsed_sequence = tf.io.parse_single_sequence_example(
                sequence_example, context_features, sequence_features)

            sequence_length = tf.shape(
                parsed_sequence[ms.get_image_encoded_key()])[0]
            num_segments = tf.shape(
                parsed_context[ms.get_segment_label_index_key()])[0]
            # segments matrix and targets for training.
            segments_matrix, indicator = one_hot_segments(
                tf.sparse_tensor_to_dense(
                    parsed_context[ms.get_segment_start_index_key()]),
                tf.sparse_tensor_to_dense(
                    parsed_context[ms.get_segment_end_index_key()]),
                sequence_length)

            classification_target = timepoint_classification_target(
                segments_matrix,
                tf.sparse_tensor_to_dense(
                    parsed_context[ms.get_segment_label_index_key()]) +
                CLASS_LABEL_OFFSET, NUM_CLASSES + CLASS_LABEL_OFFSET)

            # [segments, 2] start and end time in seconds.
            gt_segment_seconds = tf.to_float(
                tf.concat([
                    tf.expand_dims(
                        tf.sparse_tensor_to_dense(parsed_context[
                            ms.get_segment_start_timestamp_key()]), 1),
                    tf.expand_dims(
                        tf.sparse_tensor_to_dense(parsed_context[
                            ms.get_segment_end_timestamp_key()]), 1)
                ], 1)) / float(SECONDS_TO_MICROSECONDS)
            gt_segment_classes = tf.sparse_tensor_to_dense(parsed_context[
                ms.get_segment_label_index_key()]) + CLASS_LABEL_OFFSET
            example_id = parsed_context[ms.get_example_id_key()]
            sampling_rate = parsed_context[ms.get_image_frame_rate_key()]

            images = tf.map_fn(tf.image.decode_jpeg,
                               parsed_sequence[ms.get_image_encoded_key()],
                               back_prop=False,
                               dtype=tf.uint8)

            output_dict = {
                "segment_matrix": segments_matrix,
                "indicator_matrix": indicator,
                "classification_target": classification_target,
                "example_id": example_id,
                "sampling_rate": sampling_rate,
                "gt_segment_seconds": gt_segment_seconds,
                "gt_segment_classes": gt_segment_classes,
                "num_segments": num_segments,
                "num_timesteps": sequence_length,
                "images": images,
            }
            return output_dict