def parse_fn(sequence_example): """Parses a clip classification example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), ms.get_clip_label_index_key(): ms.get_clip_label_index_default_parser(), ms.get_clip_label_string_key(): ms.get_clip_label_string_default_parser() } sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(), } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) example_id = parsed_context[ms.get_example_id_key()] classification_target = tf.one_hot( tf.sparse_tensor_to_dense( parsed_context[ms.get_clip_label_index_key()]), NUM_CLASSES) images = tf.map_fn( tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8) return { "id": example_id, "labels": classification_target, "images": images, }
def parse_fn(sequence_example): """Parses a Kinetics example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), } if parse_labels: context_features[ ms.get_clip_label_string_key()] = tf.FixedLenFeature( (), tf.string) context_features[ ms.get_clip_label_index_key()] = tf.FixedLenFeature( (), tf.int64) sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser(), ms.get_forward_flow_encoded_key(): ms.get_forward_flow_encoded_default_parser(), } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) images = tf.image.convert_image_dtype( tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8), tf.float32) num_frames = tf.shape(images)[0] flow = tf.image.convert_image_dtype( tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_forward_flow_encoded_key()], back_prop=False, dtype=tf.uint8), tf.float32) # The flow is quantized for storage in JPEGs by the FlowToImageCalculator. # The quantization needs to be inverted. flow = (flow[:, :, :, :2] - 0.5) * 2 * 20. output_dict = { "images": images, "flow": flow, "num_frames": num_frames, } if parse_labels: target = tf.one_hot( parsed_context[ms.get_clip_label_index_key()], 700) output_dict["labels"] = target return output_dict
def parse_fn(sequence_example): """Parses a Charades example.""" context_features = { ms.get_example_id_key(): ms.get_example_id_default_parser(), ms.get_segment_start_index_key(): (ms.get_segment_start_index_default_parser()), ms.get_segment_end_index_key(): (ms.get_segment_end_index_default_parser()), ms.get_segment_label_index_key(): (ms.get_segment_label_index_default_parser()), ms.get_segment_label_string_key(): (ms.get_segment_label_string_default_parser()), ms.get_segment_start_timestamp_key(): (ms.get_segment_start_timestamp_default_parser()), ms.get_segment_end_timestamp_key(): (ms.get_segment_end_timestamp_default_parser()), ms.get_image_frame_rate_key(): (ms.get_image_frame_rate_default_parser()), } sequence_features = { ms.get_image_encoded_key(): ms.get_image_encoded_default_parser() } parsed_context, parsed_sequence = tf.io.parse_single_sequence_example( sequence_example, context_features, sequence_features) sequence_length = tf.shape( parsed_sequence[ms.get_image_encoded_key()])[0] num_segments = tf.shape( parsed_context[ms.get_segment_label_index_key()])[0] # segments matrix and targets for training. segments_matrix, indicator = one_hot_segments( tf.sparse_tensor_to_dense( parsed_context[ms.get_segment_start_index_key()]), tf.sparse_tensor_to_dense( parsed_context[ms.get_segment_end_index_key()]), sequence_length) classification_target = timepoint_classification_target( segments_matrix, tf.sparse_tensor_to_dense( parsed_context[ms.get_segment_label_index_key()]) + CLASS_LABEL_OFFSET, NUM_CLASSES + CLASS_LABEL_OFFSET) # [segments, 2] start and end time in seconds. gt_segment_seconds = tf.to_float( tf.concat([ tf.expand_dims( tf.sparse_tensor_to_dense(parsed_context[ ms.get_segment_start_timestamp_key()]), 1), tf.expand_dims( tf.sparse_tensor_to_dense(parsed_context[ ms.get_segment_end_timestamp_key()]), 1) ], 1)) / float(SECONDS_TO_MICROSECONDS) gt_segment_classes = tf.sparse_tensor_to_dense(parsed_context[ ms.get_segment_label_index_key()]) + CLASS_LABEL_OFFSET example_id = parsed_context[ms.get_example_id_key()] sampling_rate = parsed_context[ms.get_image_frame_rate_key()] images = tf.map_fn(tf.image.decode_jpeg, parsed_sequence[ms.get_image_encoded_key()], back_prop=False, dtype=tf.uint8) output_dict = { "segment_matrix": segments_matrix, "indicator_matrix": indicator, "classification_target": classification_target, "example_id": example_id, "sampling_rate": sampling_rate, "gt_segment_seconds": gt_segment_seconds, "gt_segment_classes": gt_segment_classes, "num_segments": num_segments, "num_timesteps": sequence_length, "images": images, } return output_dict