示例#1
0
import tensorflow as tf
import numpy as np
import os

from datautil.ssd_vgg_preprocessing import preprocess_for_train, preprocess_for_eval
from model import ssd_common
from tfutil import tf_utils

features = {
    'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
    'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
    'image/height': tf.FixedLenFeature([1], tf.int64),
    'image/width': tf.FixedLenFeature([1], tf.int64),
    'image/channels': tf.FixedLenFeature([1], tf.int64),
    'image/shape': tf.FixedLenFeature([3], tf.int64),
    'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
    'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
    'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
    'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
    'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
    'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
    'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
}


def get_parser_func(anchors, num_classes, is_training, var_scope):
    '''
    Dataset parser function for training and evaluation

    Arguments:
        preprocess_fn - function that does preprocesing
示例#2
0
    def parse_example_proto(
            self, example_serialized):  #TODO(lowres check this mainly)
        # Dense features in Example proto.
        feature_map = {
            'image/encoded':
            tf.VarLenFeature(dtype=tf.string),
            'image/speeds':
            tf.VarLenFeature(dtype=tf.float32),
            'image/class/video_name':
            tf.FixedLenFeature([1], dtype=tf.string, default_value=''),
        }
        if FLAGS.only_seg == 1:
            feature_map.update({
                'image/segmentation':
                tf.VarLenFeature(dtype=tf.string),
                'image/context':
                tf.VarLenFeature(dtype=tf.string)
            })

        if FLAGS.use_speed_yaw:
            feature_map.update({
                'sensor/yaw_imu':
                tf.VarLenFeature(dtype=tf.float32),
                'sensor/speed_steer':
                tf.VarLenFeature(dtype=tf.float32)
            })

        features = tf.parse_single_example(example_serialized, feature_map)

        # if the data is downsampled by a temporal factor, the starting point should be random, such that we could use
        # all the data
        if FLAGS.non_random_temporal_downsample:
            tstart = 0
        else:
            tstart = tf.random_uniform([],
                                       minval=0,
                                       maxval=FLAGS.temporal_downsample_factor,
                                       dtype=tf.int32)
        len_downsampled = FLAGS.FRAMES_IN_SEG // FLAGS.temporal_downsample_factor
        if FLAGS.only_seg == 1:
            seg = features['image/segmentation'].values[:]
            seg.set_shape([len_downsampled])
            ctx = features['image/context'].values[:]
            ctx.set_shape([len_downsampled])

        name = features['image/class/video_name']

        encoded = features['image/encoded'].values[:FLAGS.FRAMES_IN_SEG]
        encoded_sub = encoded[tstart::FLAGS.temporal_downsample_factor]
        encoded_sub.set_shape([len_downsampled])
        if FLAGS.no_image_input:
            # no image input is used, but the previous steps is done because
            # we assume we have an list of empty image inputs
            decoded = tf.zeros([
                len_downsampled,
                FLAGS.IM_HEIGHT / FLAGS.decode_downsample_factor,
                FLAGS.IM_WIDTH / FLAGS.decode_downsample_factor, 3
            ], tf.uint8)
        else:
            decoded = self.decode_jpeg(encoded_sub)
            if FLAGS.only_seg == 1:
                seg_decoded = self.decode_png(seg)
                ctx_decoded = tf.py_func(self.read_array, [ctx],
                                         [tf.float32])[0]
                ctx_decoded.set_shape(
                    [len_downsampled, ctx_channel, ctx_height, ctx_width])

        decoded_raw = decoded
        if FLAGS.resize_images != "":
            # should have format: new_height, new_width
            sp_size = FLAGS.resize_images.split(",")
            assert (len(sp_size) == 2)
            new_size = (int(sp_size[0]), int(sp_size[1]))
            decoded = tf.image.resize_bilinear(decoded, new_size)
            #decoded = tf.image.resize_nearest_neighbor(decoded, new_size)
            decoded = tf.cast(decoded, tf.uint8)

        if FLAGS.crop_car_hood > 0:
            decoded = decoded[:, :-FLAGS.crop_car_hood, :, :]

        speed = features['image/speeds'].values
        speed = tf.reshape(speed, [-1, 2])
        speed = speed[:FLAGS.FRAMES_IN_SEG, :]
        speed = speed[tstart::FLAGS.temporal_downsample_factor, :]
        speed.set_shape([len_downsampled, 2])

        # from speed to stop labels
        stop_label = tf.py_func(
            self.speed_to_future_has_stop,
            [speed, FLAGS.stop_future_frames, FLAGS.speed_limit_as_stop],
            [tf.int32])[0]  #TODO(lowres: length of smoothed time)
        stop_label.set_shape([len_downsampled])

        # Note that the turning heuristic is tuned for 3Hz video and urban area
        # Note also that stop_future_frames is reused for the turn
        turn = tf.py_func(
            self.turn_future_smooth,
            [speed, FLAGS.stop_future_frames, FLAGS.speed_limit_as_stop],
            [tf.float32])[0]  #TODO(lowres)
        turn.set_shape([len_downsampled, self.naction])

        if FLAGS.use_speed_yaw:
            yaw = features['sensor/yaw_imu'].values
            spd = features['sensor/speed_steer'].values
            ys = tf.pack([yaw, spd], axis=1, name="stack_yaw_speed")
            # Now the shape is N*2

            ys = ys[
                tstart:FLAGS.FRAMES_IN_SEG:FLAGS.temporal_downsample_factor, :]
            ys.set_shape([len_downsampled, 2])

            if not FLAGS.use_nan_padding:
                # compute locs from ys
                ys = tf.pad(ys, [[0, FLAGS.stop_future_frames], [0, 0]],
                            mode="SYMMETRIC",
                            name="pad_afterwards")
            else:
                # invalidate the last two entries by setting it to NaN
                nan_const = tf.constant(float('NaN'),
                                        dtype=tf.float32,
                                        shape=(FLAGS.stop_future_frames, 2),
                                        name="NaN_constant")
                ys = tf.concat(0, [ys, nan_const], name="nan_pad_afterwards")

            ys = ys[FLAGS.stop_future_frames:, :]
            ys.set_shape([len_downsampled, 2])
            locs = ys
            print("data loader is using raw yaw and speed")
        else:
            # get the relative future location
            # Note that we again abuse the notation a little bit, reusing stop_future_frames
            # TODO: normalize the course and speed by time
            locs = tf.py_func(self.relative_future_course_speed, [
                speed, FLAGS.stop_future_frames,
                FLAGS.frame_rate / FLAGS.temporal_downsample_factor
            ], [tf.float32])[0]
            locs.set_shape([len_downsampled, 2])

        # batching one 10 second segments into several smaller segments
        batching_inputs = [decoded, speed, stop_label, turn, locs]
        if FLAGS.only_seg == 1:
            batching_inputs += [seg_decoded, ctx_decoded]
            decoded_raw_loc = 7
        else:
            decoded_raw_loc = 5
        batching_inputs += [decoded_raw]
        batched = [self.batching(x, len_downsampled) for x in batching_inputs]

        name = tf.tile(name, [batched[0].get_shape()[0].value])

        ins = batched[0:2] + [name]
        outs = batched[2:5]
        if FLAGS.city_data:
            # city batch means how many batch does each video sequence forms
            FLAGS.city_batch = len_downsampled // FLAGS.n_sub_frame

            # here we want to read in the cityscape data and downsample in the loop
            city_im_queue, city_seg_queue = self.queue_cityscape(
                FLAGS.city_image_list, FLAGS.city_label_list)

            global city_pointer
            city_pointer = 0
            read_n = city_frames * FLAGS.city_batch
            city_im, city_seg = tf.py_func(
                self.read_cityscape, [city_im_queue, city_seg_queue, read_n],
                [tf.float32, tf.int32])

            city_im = tf.reshape(city_im, [
                FLAGS.city_batch, city_frames, FLAGS.IM_HEIGHT, FLAGS.IM_WIDTH,
                city_im_channel
            ])
            city_seg = tf.reshape(city_seg, [
                FLAGS.city_batch, city_frames, FLAGS.IM_HEIGHT, FLAGS.IM_WIDTH,
                city_seg_channel
            ])

            if FLAGS.resize_images != "":
                # should have format: new_height, new_width
                sp_size = FLAGS.resize_images.split(",")
                assert (len(sp_size) == 2)
                new_size = (int(sp_size[0]), int(sp_size[1]))
                city_im = tf.reshape(city_im, [
                    FLAGS.city_batch * city_frames, FLAGS.IM_HEIGHT,
                    FLAGS.IM_WIDTH, city_im_channel
                ])
                city_seg = tf.reshape(city_seg, [
                    FLAGS.city_batch * city_frames, FLAGS.IM_HEIGHT,
                    FLAGS.IM_WIDTH, city_seg_channel
                ])
                city_im = tf.image.resize_bilinear(city_im, new_size)
                city_seg = tf.image.resize_nearest_neighbor(city_seg, new_size)
                city_im = tf.reshape(city_im, [
                    FLAGS.city_batch, city_frames, new_size[0], new_size[1],
                    city_im_channel
                ])
                city_seg = tf.reshape(city_seg, [
                    FLAGS.city_batch, city_frames, new_size[0], new_size[1],
                    city_seg_channel
                ])
            ins += [city_im]
            outs += [city_seg]
        if FLAGS.only_seg == 1:
            ins = ins + batched[5:7]
            outs = outs

        # adding the raw images
        ins += batched[decoded_raw_loc:(decoded_raw_loc + 1)]

        if FLAGS.action_mapping_loss:
            assert not FLAGS.city_data
            assert not FLAGS.only_seg
            outs += [ins[-2]]

        # dropout non-stop videos
        if FLAGS.balance_drop_prob > 0:
            retained = tf.py_func(self.no_stop_dropout_valid,
                                  [outs[0], FLAGS.balance_drop_prob],
                                  [tf.bool])[0]
            retained.set_shape([outs[0].get_shape()[0].value])

            select = lambda tensors, valid: [
                util.bool_select(x, valid) for x in tensors
            ]
            ins = select(ins, retained)
            outs = select(outs, retained)
        return ins, outs
def get_split(split_name, dataset_dir, file_pattern, reader, split_to_sizes,
              items_to_descriptions, num_classes):
    """Gets a dataset tuple with instructions for reading Pascal VOC dataset.

    Args:
      split_name: A train/test split name.
      dataset_dir: The base directory of the dataset sources.
      file_pattern: The file pattern to use when matching the dataset sources.
        It is assumed that the pattern contains a '%s' string so that the split
        name can be inserted.
      reader: The TensorFlow reader type.

    Returns:
      A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/test split.
    """
    if split_name not in split_to_sizes:
        raise ValueError('split name %s was not recognized.' % split_name)
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader
    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult':
        slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated':
        slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=split_to_sizes[split_name],
                                items_to_descriptions=items_to_descriptions,
                                num_classes=num_classes,
                                labels_to_names=labels_to_names)
示例#4
0
def verify_bboxes(tfrecords):

    filename_queue = tf.train.string_input_producer(
        tfrecords,
        num_epochs=1
    )

    # Construct a Reader to read examples from the .tfrecords file
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized_example,
        features={
            'image/id' : tf.FixedLenFeature([], tf.string),
            'image/height' : tf.FixedLenFeature([], tf.int64),
            'image/width' : tf.FixedLenFeature([], tf.int64),
            'image/object/bbox/xmin' : tf.VarLenFeature(dtype=tf.float32),
            'image/object/bbox/ymin' : tf.VarLenFeature(dtype=tf.float32),
            'image/object/bbox/xmax' : tf.VarLenFeature(dtype=tf.float32),
            'image/object/bbox/ymax' : tf.VarLenFeature(dtype=tf.float32),
            'image/object/count' : tf.FixedLenFeature([], tf.int64)
        }
    )

    image_height = tf.cast(features['image/height'], tf.float32)
    image_width = tf.cast(features['image/width'], tf.float32)

    image_id = features['image/id']

    xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
    ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
    xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
    ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)

    num_bboxes = tf.cast(features['image/object/count'], tf.int32)

    bboxes = tf.concat(axis=0, values=[xmin, ymin, xmax, ymax])
    bboxes = tf.transpose(bboxes, [1, 0])

    fetches = [image_id, image_height, image_width, bboxes, num_bboxes]

    image_count = 0
    bbox_widths = []
    bbox_heights = []
    images_with_small_bboxes = set()
    images_with_reversed_coords = set()
    images_with_bbox_count_mismatch = set()

    coord = tf.train.Coordinator()
    with tf.Session() as sess:

        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        tf.train.start_queue_runners(sess=sess, coord=coord)

        try:

            while not coord.should_stop():

                outputs = sess.run(fetches)

                img_id = outputs[0]
                img_h = outputs[1]
                img_w = outputs[2]
                img_bboxes = outputs[3]
                img_num_bboxes = outputs[4]

                if img_bboxes.shape[0] != img_num_bboxes:
                    images_with_bbox_count_mismatch.add(img_id)

                for img_bbox in img_bboxes:
                    x1, y1, x2, y2 = img_bbox

                    # Reversed coordinates?
                    if x1 > x2:
                        images_with_reversed_coords.add(img_id)
                        t = x1
                        x1 = x2
                        x2 = t
                    if y1 > y2:
                        images_with_reversed_coords.add(img_id)
                        t = y1
                        y1 = y2
                        y2 = t

                    w = (x2 - x1) * img_w
                    h = (y2 - y1) * img_h

                    # Too small of an area?
                    if w * h < 10:
                        images_with_small_bboxes.add(img_id)

                    bbox_widths.append(w)
                    bbox_heights.append(h)

                image_count += 1


        except tf.errors.OutOfRangeError as e:
            pass

    # Basic info
    print("Found %d images" % (image_count,))
    print()
    print("Found %d images with small bboxes" % (len(images_with_small_bboxes),))
    #print("Images with areas < 10:")
    #for img_id in images_with_small_bboxes:
    #    print(img_id)
    print()
    print("Found %d images with reversed coordinates" %
          (len(images_with_reversed_coords),))
    #print("Images with reversed coordinates:")
    #for img_id in images_with_reversed_coords:
    #    print(img_id)
    print()
    print("Found %d images with bbox count mismatches" %
          (len(images_with_bbox_count_mismatch),))
    #for img_id in images_with_bbox_count_mismatch:
    #    print(img_id)
    print()

    bbox_widths = np.round(np.array(bbox_widths)).astype(int)
    bbox_heights = np.round(np.array(bbox_heights)).astype(int)

    print("Mean width: %0.4f" % (np.mean(bbox_widths),))
    print("Median width: %d" % (np.median(bbox_widths),))
    print("Max width: %d" % (np.max(bbox_widths),))
    print("Min width: %d" % (np.min(bbox_widths),))
    print()
    print("Mean height: %0.4f" % (np.mean(bbox_heights),))
    print("Median height: %d" % (np.median(bbox_heights),))
    print("Max height: %d" % (np.max(bbox_heights),))
    print("Min height: %d" % (np.min(bbox_heights),))
示例#5
0
        def parser(record):
            # preprocess "inp_perm" and "tgt_perm"
            def _process_perm_feature(example, prefix):
                for b in range(len(bin_sizes)):
                    cnt = example.pop("{}_cnt_{}".format(prefix, b))[0]
                    tup = example.pop("{}_tup_{}".format(prefix, b))

                    tup = tf.reshape(tf.sparse_tensor_to_dense(tup),
                                     shape=[cnt, 2])

                    # tf.float32
                    perm = tf.sparse_to_dense(
                        sparse_indices=tup,
                        output_shape=[tgt_len, bin_sizes[b]],
                        sparse_values=1.0,
                        default_value=0.0)

                    example["{}_perm_{}".format(prefix, b)] = perm

            # whether allow the last batch with a potentially shorter length
            if use_tpu:
                record_spec = {
                    "inputs": tf.FixedLenFeature([tgt_len], tf.int64),
                    "labels": tf.FixedLenFeature([tgt_len], tf.int64),
                }
            else:
                record_spec = {
                    "inputs": tf.VarLenFeature(tf.int64),
                    "labels": tf.VarLenFeature(tf.int64),
                }

            # permutation related features
            if bin_sizes and use_tpu:
                # tf.float32
                record_spec["inp_mask"] = tf.FixedLenFeature([tgt_len],
                                                             tf.float32)
                record_spec["tgt_mask"] = tf.FixedLenFeature([tgt_len],
                                                             tf.float32)

                record_spec["head_labels"] = tf.FixedLenFeature([tgt_len],
                                                                tf.int64)

                for b in range(len(bin_sizes)):
                    record_spec["inp_cnt_{}".format(b)] = tf.FixedLenFeature(
                        [1], tf.int64)
                    record_spec["inp_tup_{}".format(b)] = tf.VarLenFeature(
                        tf.int64)
                    record_spec["tgt_cnt_{}".format(b)] = tf.FixedLenFeature(
                        [1], tf.int64)
                    record_spec["tgt_tup_{}".format(b)] = tf.VarLenFeature(
                        tf.int64)

            # retrieve serialized example
            example = tf.parse_single_example(serialized=record,
                                              features=record_spec)

            # transform permutation tuples to permutation matrices
            if bin_sizes and use_tpu:
                _process_perm_feature(example, "inp")
                _process_perm_feature(example, "tgt")

            # cast int64 into int32
            # cast sparse to dense
            for key in list(example.keys()):
                val = example[key]
                if tf.keras.backend.is_sparse(val):
                    val = tf.sparse.to_dense(val)
                if val.dtype == tf.int64:
                    val = tf.to_int32(val)
                example[key] = val

            if use_tpu:
                return example
            else:
                return example["inputs"], example["labels"]
示例#6
0
    def prepare_serialized_examples(self,
                                    serialized_example,
                                    max_quantized_value=2,
                                    min_quantized_value=-2):
        """Parse single serialized SequenceExample from the TFRecords."""

        # Read/parse frame/segment-level labels.
        context_features = {
            "id": tf.FixedLenFeature([], tf.string),
        }
        if self.segment_labels:
            context_features.update({
                # There is no need to read end-time given we always assume the segment
                # has the same size.
                "segment_labels":
                tf.VarLenFeature(tf.int64),
                "segment_start_times":
                tf.VarLenFeature(tf.int64),
                "segment_scores":
                tf.VarLenFeature(tf.float32)
            })
        else:
            context_features.update({"labels": tf.VarLenFeature(tf.int64)})
        sequence_features = {
            feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string)
            for feature_name in self.feature_names
        }
        contexts, features = tf.parse_single_sequence_example(
            serialized_example,
            context_features=context_features,
            sequence_features=sequence_features)

        # loads (potentially) different types of features and concatenates them
        num_features = len(self.feature_names)
        assert num_features > 0, "No feature selected: feature_names is empty!"

        assert len(self.feature_names) == len(self.feature_sizes), (
            "length of feature_names (={}) != length of feature_sizes (={})".
            format(len(self.feature_names), len(self.feature_sizes)))

        num_frames = -1  # the number of frames in the video
        feature_matrices = [None
                            ] * num_features  # an array of different features
        for feature_index in range(num_features):
            feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
                features[self.feature_names[feature_index]],
                self.feature_sizes[feature_index], self.max_frames,
                max_quantized_value, min_quantized_value)
            if num_frames == -1:
                num_frames = num_frames_in_this_feature

            feature_matrices[feature_index] = feature_matrix

        # cap the number of frames at self.max_frames
        num_frames = tf.minimum(num_frames, self.max_frames)

        # concatenate different features
        video_matrix = tf.concat(feature_matrices, 1)

        # Partition frame-level feature matrix to segment-level feature matrix.
        if self.segment_labels:
            start_times = contexts["segment_start_times"].values
            # Here we assume all the segments that started at the same start time has
            # the same segment_size.
            uniq_start_times, seg_idxs = tf.unique(start_times,
                                                   out_idx=tf.dtypes.int64)
            # TODO(zhengxu): Ensure the segment_sizes are all same.
            segment_size = self.segment_size
            # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3.
            range_mtx = tf.expand_dims(
                uniq_start_times, axis=-1) + tf.expand_dims(
                    tf.range(0, segment_size, dtype=tf.int64), axis=0)
            # Shape: [num_segment, segment_size, feature_dim].
            batch_video_matrix = tf.gather_nd(
                video_matrix, tf.expand_dims(range_mtx, axis=-1))
            num_segment = tf.shape(batch_video_matrix)[0]
            batch_video_ids = tf.reshape(
                tf.tile([contexts["id"]], [num_segment]), (num_segment, ))
            batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]),
                                      (num_segment, ))

            # For segment labels, all labels are not exhausively rated. So we only
            # evaluate the rated labels.

            # Label indices for each segment, shape: [num_segment, 2].
            label_indices = tf.stack(
                [seg_idxs, contexts["segment_labels"].values], axis=-1)
            label_values = contexts["segment_scores"].values
            sparse_labels = tf.sparse.SparseTensor(
                label_indices, label_values, (num_segment, self.num_classes))
            batch_labels = tf.sparse.to_dense(sparse_labels,
                                              validate_indices=False)

            sparse_label_weights = tf.sparse.SparseTensor(
                label_indices, tf.ones_like(label_values, dtype=tf.float32),
                (num_segment, self.num_classes))
            batch_label_weights = tf.sparse.to_dense(sparse_label_weights,
                                                     validate_indices=False)
        else:
            # Process video-level labels.
            label_indices = contexts["labels"].values
            sparse_labels = tf.sparse.SparseTensor(
                tf.expand_dims(label_indices, axis=-1),
                tf.ones_like(contexts["labels"].values, dtype=tf.bool),
                (self.num_classes, ))
            labels = tf.sparse.to_dense(sparse_labels,
                                        default_value=False,
                                        validate_indices=False)
            # convert to batch format.
            batch_video_ids = tf.expand_dims(contexts["id"], 0)
            batch_video_matrix = tf.expand_dims(video_matrix, 0)
            batch_labels = tf.expand_dims(labels, 0)
            batch_frames = tf.expand_dims(num_frames, 0)
            batch_label_weights = None

        output_dict = {
            "video_ids": batch_video_ids,
            "video_matrix": batch_video_matrix,
            "labels": batch_labels,
            "num_frames": batch_frames,
        }
        if batch_label_weights is not None:
            output_dict["label_weights"] = batch_label_weights

        return output_dict
示例#7
0
文件: fsns.py 项目: miglopst/models
def get_split(split_name, dataset_dir=None, config=None):
    """Returns a dataset tuple for FSNS dataset.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources, by default it uses
      a predefined CNS path (see DEFAULT_DATASET_DIR).
    config: A dictionary with dataset configuration. If None - will use the
      DEFAULT_CONFIG.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
    if not dataset_dir:
        dataset_dir = DEFAULT_DATASET_DIR

    if not config:
        config = DEFAULT_CONFIG

    if split_name not in config['splits']:
        raise ValueError('split name %s was not recognized.' % split_name)

    logging.info('Using %s dataset split_name=%s dataset_dir=%s',
                 config['name'], split_name, dataset_dir)

    # Ignores the 'image/height' feature.
    zero = tf.zeros([1], dtype=tf.int64)
    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='png'),
        'image/width':
        tf.FixedLenFeature([1], tf.int64, default_value=zero),
        'image/orig_width':
        tf.FixedLenFeature([1], tf.int64, default_value=zero),
        'image/class':
        tf.FixedLenFeature([config['max_sequence_length']], tf.int64),
        'image/unpadded_class':
        tf.VarLenFeature(tf.int64),
        'image/text':
        tf.FixedLenFeature([1], tf.string, default_value=''),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image(shape=config['image_shape'],
                                     image_key='image/encoded',
                                     format_key='image/format'),
        'label':
        slim.tfexample_decoder.Tensor(tensor_key='image/class'),
        'text':
        slim.tfexample_decoder.Tensor(tensor_key='image/text'),
        'num_of_views':
        _NumOfViewsHandler(width_key='image/width',
                           original_width_key='image/orig_width',
                           num_of_views=config['num_of_views'])
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)
    charset_file = os.path.join(dataset_dir, config['charset_filename'])
    charset = read_charset(charset_file)
    file_pattern = os.path.join(dataset_dir,
                                config['splits'][split_name]['pattern'])
    return slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=tf.TFRecordReader,
        decoder=decoder,
        num_samples=config['splits'][split_name]['size'],
        items_to_descriptions=config['items_to_descriptions'],
        #  additional parameters for convenience.
        charset=charset,
        num_char_classes=len(charset),
        num_of_views=config['num_of_views'],
        max_sequence_length=config['max_sequence_length'],
        null_code=config['null_code'])
    def _parse_function(example):
        # parsing
        context_feature_info = {
            'cameras': tf.VarLenFeature(dtype=tf.string),
            'gazemaps': tf.VarLenFeature(dtype=tf.string),
            'video_id': tf.FixedLenFeature(shape=[], dtype=tf.int64)
        }
        sequence_feature_info = {
            'feature_maps':
            tf.FixedLenSequenceFeature(shape=[], dtype=tf.string),
            'gaze_ps':
            tf.FixedLenSequenceFeature(shape=[], dtype=tf.string),
            'predicted_time_points':
            tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64)
        }
        context_features, sequence_features = tf.parse_single_sequence_example(
            example,
            context_features=context_feature_info,
            sequence_features=sequence_feature_info)

        cameras = tf.sparse_tensor_to_dense(context_features["cameras"],
                                            default_value='')
        gazemaps = tf.sparse_tensor_to_dense(context_features["gazemaps"],
                                             default_value='')
        video_id = context_features['video_id']

        feature_maps = tf.reshape(
            tf.decode_raw(sequence_features["feature_maps"], tf.float32), [
                -1,
            ] + args.feature_map_size + [args.feature_map_channels])
        predicted_time_points = sequence_features["predicted_time_points"]

        if include_labels:
            labels = tf.reshape(
                tf.decode_raw(sequence_features["gaze_ps"], tf.float32),
                [-1, args.gazemap_size[0] * args.gazemap_size[1]])

        if n_steps is not None:
            #select a subsequence
            length = tf.shape(cameras)[0]

            offset = tf.random_uniform(shape=[],
                                       minval=0,
                                       maxval=tf.maximum(
                                           length - n_steps + 1, 1),
                                       dtype=tf.int32)
            end = tf.minimum(offset + n_steps, length)
            cameras = cameras[offset:end]
            feature_maps = feature_maps[offset:end]
            gazemaps = gazemaps[offset:end]
            predicted_time_points = predicted_time_points[offset:end]
            if include_labels:
                labels = labels[offset:end]

        # decode jpg's
        cameras = tf.map_fn(tf.image.decode_jpeg,
                            cameras,
                            dtype=tf.uint8,
                            back_prop=False)
        gazemaps = tf.map_fn(tf.image.decode_jpeg,
                             gazemaps,
                             dtype=tf.uint8,
                             back_prop=False)

        # return features and labels
        features = {}
        features['cameras'] = cameras
        features['feature_maps'] = feature_maps
        features['gazemaps'] = gazemaps
        features['video_id'] = video_id
        features['predicted_time_points'] = predicted_time_points

        if include_labels:
            return features, labels
        else:
            return features
    def __init__(self, dtype='uint8'):
        """Constructor sets keys_to_features and items_to_handlers.

         Args:
          image_shape: image shape for raw data format.
        """

        if dtype == 'float32':
            self._dtype = tf.float32
        elif dtype == 'uint16':
            self._dtype = tf.uint16
        else:
            self._dtype = tf.uint8

        self.keys_to_features = {
            'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
            'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/sensor': tf.FixedLenFeature((), tf.string, default_value=''),
            'image/height': tf.FixedLenFeature((), tf.int64, 1),
            'image/width': tf.FixedLenFeature((), tf.int64, 1),
            'image/gsd': tf.FixedLenFeature((), tf.float32, 1),

            # Object boxes.
            'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),

            # Object rotated boxes.
            'image/object/rbbox/cy': tf.VarLenFeature(tf.float32),
            'image/object/rbbox/cx': tf.VarLenFeature(tf.float32),
            'image/object/rbbox/h': tf.VarLenFeature(tf.float32),
            'image/object/rbbox/w': tf.VarLenFeature(tf.float32),
            'image/object/rbbox/ang': tf.VarLenFeature(tf.float32),

            # Object classes.
            'image/object/class/label': tf.VarLenFeature(tf.int64),
            'image/object/area': tf.VarLenFeature(tf.float32),
            'image/object/is_crowd': tf.VarLenFeature(tf.int64),
            'image/object/difficult': tf.VarLenFeature(tf.int64),

            # Instance masks and classes.
            'image/segmentation/object': tf.VarLenFeature(tf.int64),
            'image/segmentation/object/class': tf.VarLenFeature(tf.int64)
        }
        self.items_to_handlers = {
            fields.InputDataFields.image: Image(dtype=self._dtype),
            fields.InputDataFields.source_id: (
                slim_example_decoder.Tensor('image/source_id')),
            fields.InputDataFields.sensor: (
                slim_example_decoder.Tensor('image/sensor')),
            fields.InputDataFields.key: (
                slim_example_decoder.Tensor('image/key/sha256')),
            fields.InputDataFields.filename: (
                slim_example_decoder.Tensor('image/filename')),
            fields.InputDataFields.gsd: (
                slim_example_decoder.Tensor('image/gsd')),
            # Object boxes.
            fields.InputDataFields.groundtruth_boxes: (
                slim_example_decoder.BoundingBox(
                    ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
            # Object rotated boxes.
            fields.InputDataFields.groundtruth_rboxes: (
                RotatedBoundingBox(
                    ['cy', 'cx', 'h', 'w', 'ang'], 'image/object/rbbox/')),
            # Object classes.
            fields.InputDataFields.groundtruth_classes: (
                slim_example_decoder.Tensor('image/object/class/label')),
            fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
                'image/object/area'),
            fields.InputDataFields.groundtruth_is_crowd: (
                slim_example_decoder.Tensor('image/object/is_crowd')),
            fields.InputDataFields.groundtruth_difficult: (
                slim_example_decoder.Tensor('image/object/difficult')),
            # Instance masks and classes.
            fields.InputDataFields.groundtruth_instance_masks: (
                slim_example_decoder.ItemHandlerCallback(
                    ['image/segmentation/object', 'image/height', 'image/width'],
                    self._reshape_instance_masks)),
            fields.InputDataFields.groundtruth_instance_classes: (
                slim_example_decoder.Tensor('image/segmentation/object/class')),
        }
示例#10
0
def distorted_inputs(data_dir, batch_size):

    file_dir = os.path.join(data_dir, 'train.tfrecord')
    num_examples_per_epoch = NUM_EXAMPLES_FOR_TRAIN
    if not tf.gfile.Exists(file_dir):
        raise ValueError('Failed to find file%s' % file_dir)
    with tf.name_scope('input'):

        file_queue = tf.train.string_input_producer([file_dir], num_epochs=50)
        reader = tf.TFRecordReader()
        _, serialized = reader.read(file_queue)
        feature = tf.parse_single_example(serialized=serialized,
                                          features={
                                              'image/shape':
                                              tf.FixedLenFeature([3],
                                                                 tf.int64),
                                              'label':
                                              tf.FixedLenFeature([], tf.int64),
                                              'image_raw':
                                              tf.FixedLenFeature([],
                                                                 tf.string),
                                              'object/name':
                                              tf.VarLenFeature(tf.string),
                                              'object/truncated':
                                              tf.VarLenFeature(tf.int64),
                                              'object/difficult':
                                              tf.VarLenFeature(tf.int64),
                                              'object/xmin':
                                              tf.VarLenFeature(tf.float32),
                                              'object/ymin':
                                              tf.VarLenFeature(tf.float32),
                                              'object/xmax':
                                              tf.VarLenFeature(tf.float32),
                                              'object/ymax':
                                              tf.VarLenFeature(tf.float32)
                                          })
        shape, label = feature['image/shape'], feature['label']
        image_raw = feature['image_raw']

        # process lael
        # change 42 classes to 2 classes
        #label = 0 if label == [33] else 1
        label = tf.cast(label, tf.int32)

        # process image
        image = tf.image.decode_jpeg(image_raw)
        float_image = tf.cast(image, tf.float32)
        float_image = tf.reshape(float_image, [224, 224, 3])

        # data argumentation

        # Randomly crop a [height, width] section of the image.
        #distorted_image = tf.random_crop(image, shape)
        resized_image = tf.image.resize_images(float_image, (224, 224))

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(resized_image)

        # Because these operations are not commutative, consider randomizing
        # the order their operation.
        # NOTE: since per_image_standardization zeros the mean and makes
        # the stddev unit, this likely has no effect see tensorflow#1458.
        distorted_image = tf.image.random_brightness(distorted_image,
                                                     max_delta=63)
        distorted_image = tf.image.random_contrast(distorted_image,
                                                   lower=0.2,
                                                   upper=1.8)

        standard_image = tf.image.per_image_standardization(distorted_image)

        #standard_image.setshape(shape)
        #label.setshape([1])

        min_fraction_of_examples_in_queue = 0.4
        min_queue_examples = int(num_examples_per_epoch *
                                 min_fraction_of_examples_in_queue)

    return _generate_image_and_label_batch(standard_image,
                                           label,
                                           min_queue_examples,
                                           batch_size,
                                           shuffle=True)
示例#11
0
def inputs(is_train, data_dir, batch_size):
    if is_train:
        file_dir = os.path.join(data_dir, 'train.tfrecord')
        num_examples_per_epoch = NUM_EXAMPLES_FOR_TRAIN

    else:
        file_dir = os.path.join(data_dir, 'test.tfrecord')
        num_examples_per_epoch = NUM_EXAMPLES_FOR_EVAL

    if not tf.gfile.Exists(file_dir):
        raise ValueError('Failed to find file%s' % file_dir)
    with tf.name_scope('input'):

        file_queue = tf.train.string_input_producer([file_dir], num_epochs=50)
        reader = tf.TFRecordReader()
        _, serialized = reader.read(file_queue)
        feature = tf.parse_single_example(serialized=serialized,
                                          features={
                                              'image/shape':
                                              tf.FixedLenFeature([3],
                                                                 tf.int64),
                                              'label':
                                              tf.FixedLenFeature([], tf.int64),
                                              'image_raw':
                                              tf.FixedLenFeature([],
                                                                 tf.string),
                                              'object/name':
                                              tf.VarLenFeature(tf.string),
                                              'object/truncated':
                                              tf.VarLenFeature(tf.int64),
                                              'object/difficult':
                                              tf.VarLenFeature(tf.int64),
                                              'object/xmin':
                                              tf.VarLenFeature(tf.float32),
                                              'object/ymin':
                                              tf.VarLenFeature(tf.float32),
                                              'object/xmax':
                                              tf.VarLenFeature(tf.float32),
                                              'object/ymax':
                                              tf.VarLenFeature(tf.float32)
                                          })
        shape, label = feature['image/shape'], feature['label']
        image_raw = feature['image_raw']

        # process lael
        #change 42 classes to 2 classes
        #label = 0 if label==33 else 1
        label = tf.cast(label, tf.int32)

        # process image
        image = tf.image.decode_jpeg(image_raw)
        float_image = tf.cast(image, tf.float32)
        float_image = tf.reshape(float_image, [1920, 2560, 3])

        #reshape_image = tf.reshape(image,[1920,2560,3])
        #tf.decode_raw()解码二进制数据返回的是dim=1的list,需要用tf.reshpe()变成dim=3的图像形式
        #tf.image.decode_jpeg()解码二进制返回的直接就是jpeg格式编码的图像

        resized_image = tf.image.resize_images(float_image, [224, 224])
        standard_image = tf.image.per_image_standardization(resized_image)
        standard_image = tf.reshape(standard_image, [224, 224, 3])

        #standard_image.setshape(shape)
        #label.setshape([1])

        min_fraction_of_examples_in_queue = 0.2
        min_queue_examples = int(num_examples_per_epoch *
                                 min_fraction_of_examples_in_queue)

    return _generate_image_and_label_batch(standard_image,
                                           label,
                                           min_queue_examples,
                                           batch_size,
                                           shuffle=True)
示例#12
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions for reading ImageNet.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
    assert FLAGS.num_classes == 0 or FLAGS.num_classes == DEFAULT_NUM_CLASSES
    num_classes = FLAGS.num_classes or DEFAULT_NUM_CLASSES

    _SPLITS_TO_SIZES = {
        'train': FLAGS.train_size or _DEFAULT_TRAIN_SIZE,
        'validation': FLAGS.validation_size or _DEFAULT_VALIDATION_SIZE,
    }
    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/class/label':
        tf.VarLenFeature(dtype=tf.int64),
        'image/class/text':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/filename':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
    }

    output_name = 'target' if FLAGS.dataset_use_target else 'source'
    items_to_handlers = {
        output_name:
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'conditional_labels':
        dataset_utils.OneHotLabelTensor(
            'image/class/text',
            tags_id_lookup_file=FLAGS.tags_id_lookup_file,
            num_classes=num_classes,
            tags_key_column_index=FLAGS.tags_key_column_index,
            tags_value_column_index=FLAGS.tags_value_column_index),
        'label_text':
        slim.tfexample_decoder.Tensor('image/class/text'),
        'filename':
        slim.tfexample_decoder.Tensor('image/filename'),
    }
    items_used = [output_name, 'conditional_labels', 'filename', 'label_text']
    items_need_preprocessing = [
        output_name,
        'conditional_labels',
    ]

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    return slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=reader,
        decoder=decoder,
        num_samples=_SPLITS_TO_SIZES[split_name],
        items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
        items_used=items_used,
        items_need_preprocessing=items_need_preprocessing,
        num_classes=num_classes,
        has_source=True)
示例#13
0
def get_split(split_name, dataset_dir, file_pattern, num_samples, reader=None):
    dataset_dir = util.io.get_absolute_path(dataset_dir)

    if util.str.contains(file_pattern, '%'):
        file_pattern = util.io.join_path(dataset_dir,
                                         file_pattern % split_name)
    else:
        file_pattern = util.io.join_path(dataset_dir, file_pattern)
    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'filename':
        slim.tfexample_decoder.Tensor('image/filename'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/oriented_bbox/x1':
        slim.tfexample_decoder.Tensor('image/object/bbox/x1'),
        'object/oriented_bbox/x2':
        slim.tfexample_decoder.Tensor('image/object/bbox/x2'),
        'object/oriented_bbox/x3':
        slim.tfexample_decoder.Tensor('image/object/bbox/x3'),
        'object/oriented_bbox/x4':
        slim.tfexample_decoder.Tensor('image/object/bbox/x4'),
        'object/oriented_bbox/y1':
        slim.tfexample_decoder.Tensor('image/object/bbox/y1'),
        'object/oriented_bbox/y2':
        slim.tfexample_decoder.Tensor('image/object/bbox/y2'),
        'object/oriented_bbox/y3':
        slim.tfexample_decoder.Tensor('image/object/bbox/y3'),
        'object/oriented_bbox/y4':
        slim.tfexample_decoder.Tensor('image/object/bbox/y4'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label')
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = {0: 'background', 1: 'text'}
    items_to_descriptions = {
        'image': 'A color image of varying height and width.',
        'shape': 'Shape of the image',
        'object/bbox': 'A list of bounding boxes, one per each object.',
        'object/label': 'A list of labels, one per each object.',
    }

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=num_samples,
                                items_to_descriptions=items_to_descriptions,
                                num_classes=2,
                                labels_to_names=labels_to_names)
示例#14
0
from tensorflow_transform.tf_metadata import dataset_schema as sch

test_feature_spec = {
    # FixedLenFeatures
    'fixed_categorical_int_with_range':
    tf.FixedLenFeature(shape=[], dtype=tf.int64),
    'fixed_int':
    tf.FixedLenFeature(shape=[5], dtype=tf.int64),
    'fixed_float':
    tf.FixedLenFeature(shape=[5], dtype=tf.float32),
    'fixed_string':
    tf.FixedLenFeature(shape=[5], dtype=tf.string),

    # VarLenFeatures
    'var_int':
    tf.VarLenFeature(dtype=tf.int64),
    'var_float':
    tf.VarLenFeature(dtype=tf.float32),
    'var_string':
    tf.VarLenFeature(dtype=tf.string),
}


def get_test_schema():
    return sch.from_feature_spec(test_feature_spec)


def get_manually_created_schema():
    """Provide a test schema built from scratch using the Schema classes."""
    return sch.Schema({
        # FixedLenFeatures
示例#15
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions for reading ImageNet.

    Args:
        split_name: A train/test split name.
        dataset_dir: The base directory of the dataset sources.
        file_pattern: The file pattern to use when matching the dataset sources.
            It is assumed that the pattern contains a '%s' string so that the split
            name can be inserted.
        reader: The TensorFlow reader type.

    Returns:
        A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/test split.
    """
    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded':
        tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
        tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/class/label':
        tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
        'image/class/text':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/object/bbox/xmin':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax':
        tf.VarLenFeature(dtype=tf.float32),
        'image/object/class/label':
        tf.VarLenFeature(dtype=tf.int64),
    }

    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'label':
        slim.tfexample_decoder.Tensor('image/class/label'),
        'label_text':
        slim.tfexample_decoder.Tensor('image/class/text'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/class/label'),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)
    else:
        labels_to_names = create_readable_names_for_imagenet_labels()
        dataset_utils.write_label_file(labels_to_names, dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=_SPLITS_TO_SIZES[split_name],
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=_NUM_CLASSES,
                                labels_to_names=labels_to_names)
def get_next_batch():
    """
    这块就是从tfrecord文件中读取已经保存的数据;
    """
    # 获取指定目录下的所有tfrecord文件
    # #加上r让字符串不转义
    tfrecords = glob.glob(f'{hp.TRAIN_DATASET_PATH}/*.tfrecord')
    # print("line23: tfrecords = "+str(tfrecords))
    """
    tf.train.string_input_producer(
        string_tensor,
        num_epochs=None,  # NUM_EPOCHS = 150;从string_tensor中产生 num_epochs 次字符串;如果未指定,则可以无限次循环遍历字符串
        shuffle=True,     # shuffle:布尔值。如果为true,则在每个epoch内随机打乱顺序
        seed=None,
        capacity=32,
        shared_name=None,
        name=None,
        cancel_op=None )
    输出字符串到一个输入管道队列
    :从TFRecords文件中读取数据, 首先需要用tf.train.string_input_producer()生成一个解析队列。
    之后调用 tf.TFRecordReader 的 tf.parse_single_example 解析器
    https://blog.csdn.net/tefuirnever/article/details/90271862
    """
    # 输出字符串到一个输入管道队列
    filename_queue = tf.train.string_input_producer(tfrecords,
                                                    shuffle=True,
                                                    num_epochs=hp.NUM_EPOCHS)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(
        filename_queue)  # 解析器首先读取解析队列,返回serialized_example对象
    # 之后调用tf.parse_single_example操作将 Example 协议缓冲区(protocol buffer)解析为张量。
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'ori_spkid':
                                           tf.FixedLenFeature(shape=(1, ),
                                                              dtype=tf.int64),
                                           'ori_mel':
                                           tf.VarLenFeature(dtype=tf.float32),
                                           'ori_mel_shape':
                                           tf.FixedLenFeature(shape=(2, ),
                                                              dtype=tf.int64),
                                           'aim_spkid':
                                           tf.FixedLenFeature(shape=(1, ),
                                                              dtype=tf.int64),
                                           'aim_mel':
                                           tf.VarLenFeature(dtype=tf.float32),
                                           'aim_mel_shape':
                                           tf.FixedLenFeature(shape=(2, ),
                                                              dtype=tf.int64),
                                       })
    # tf.sparse_tensor_to_dense 将 SparseTensor 转换为稠密张量.(即理解为,稀疏矩阵,填充上默认值)
    features['ori_mel'] = tf.sparse_tensor_to_dense(features['ori_mel'])
    features['aim_mel'] = tf.sparse_tensor_to_dense(features['aim_mel'])
    ori_spk = features['ori_spkid']
    ori_mel = tf.reshape(features['ori_mel'], features['ori_mel_shape'])
    aim_spk = features['aim_spkid']
    aim_mel = tf.reshape(features['aim_mel'], features['aim_mel_shape'])
    # self.CODED_DIM = 60  # 压缩成60维

    ori_mel = tf.reshape(ori_mel, [-1, hp.CODED_DIM])
    aim_mel = tf.reshape(aim_mel, [-1, hp.CODED_DIM])  # 80 维度 mel
    ori_spk_batch, ori_mel_batch, aim_spk_batch, aim_mel_batch = tf.train.batch(
        [ori_spk, ori_mel, aim_spk, aim_mel],
        batch_size=hp.BATCH_SIZE,
        capacity=100,
        num_threads=10,
        dynamic_pad=True,
        allow_smaller_final_batch=False)
    """
    是说在这里,get_next_batch()函数,返回之前,就可以做 pad 操作吗?
    """
    # tf.shape(ori_mel_batch)[1]
    max_frame = tf.maximum(
        tf.shape(ori_mel_batch)[1],
        tf.shape(aim_mel_batch)[1])  # 最大帧值
    gap_frame = max_frame - tf.minimum(
        tf.shape(ori_mel_batch)[1],
        tf.shape(aim_mel_batch)[1])  # 帧值 之差

    # print(tf.math.subtract(max_frame, tf.shape(aim_mel_batch)[1]))
    padded = tf.zeros([
        tf.shape(aim_mel_batch)[0],
        tf.subtract(max_frame,
                    tf.shape(aim_mel_batch)[1]),
        tf.shape(aim_mel_batch)[2]
    ],
                      dtype=tf.float32)
    # a = padded
    aim_mel_batch = tf.concat((aim_mel_batch, padded), axis=1)
    # concated_1 = aim_mel_batch
    padded = tf.zeros([
        tf.shape(ori_mel_batch)[0],
        tf.subtract(max_frame,
                    tf.shape(ori_mel_batch)[1]),
        tf.shape(ori_mel_batch)[2]
    ],
                      dtype=tf.float32)
    # b = padded
    # padded = tf.zeros_like([1, tf.math.subtract(max_frame, tf.shape(ori_mel_batch)[1]), 1], dtype=tf.float32)
    ori_mel_batch = tf.concat((ori_mel_batch, padded), axis=1)

    # concated_2 = ori_mel_batch

    # padded = tf.zeros_like([0,差值,0])
    # aim_mel_batch = tf.concat((aim_mel_batch, padded), axis=1)

    # aim_mel_batch = tf.pad(aim_mel_batch, [[0, 0], [0, tf.math.subtract(max_frame, tf.shape(aim_mel_batch)[1])], [0, 0]], "CONSTANT")
    # ori_mel_batch = tf.pad(ori_mel_batch, [[0, 0], [0, tf.math.subtract(max_frame, tf.shape(ori_mel_batch)[1])], [0, 0]], "CONSTANT")

    # return ori_spk_batch, ori_mel_batch, aim_spk_batch, aim_mel_batch, a, b,concated_1,concated_2,max_frame
    return ori_spk_batch, ori_mel_batch, aim_spk_batch, aim_mel_batch
def main():
  # Change these for different models
  FEATURE_SIZE = 124
  LABEL_SIZE = 2
  TRAIN_TFRECORDS_FILE = "data/a8a_train.libsvm.tfrecords"
  VALIDATE_TFRECORDS_FILE = "data/a8a_test.libsvm.tfrecords"

  learning_rate = FLAGS.learning_rate
  epoch_number = FLAGS.epoch_number
  thread_number = FLAGS.thread_number
  batch_size = FLAGS.batch_size
  validate_batch_size = FLAGS.validate_batch_size
  min_after_dequeue = FLAGS.min_after_dequeue
  capacity = thread_number * batch_size + min_after_dequeue
  mode = FLAGS.mode
  checkpoint_dir = FLAGS.checkpoint_dir
  if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
  tensorboard_dir = FLAGS.tensorboard_dir
  if not os.path.exists(tensorboard_dir):
    os.makedirs(tensorboard_dir)

  def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    return serialized_example

  # Read TFRecords files for training
  filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(TRAIN_TFRECORDS_FILE),
      num_epochs=epoch_number)
  serialized_example = read_and_decode(filename_queue)
  batch_serialized_example = tf.train.shuffle_batch(
      [serialized_example],
      batch_size=batch_size,
      num_threads=thread_number,
      capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  features = tf.parse_example(batch_serialized_example,
                              features={
                                  "label": tf.FixedLenFeature(
                                      [], tf.float32),
                                  "ids": tf.VarLenFeature(tf.int64),
                                  "values": tf.VarLenFeature(tf.float32),
                              })
  batch_labels = features["label"]
  batch_ids = features["ids"]
  batch_values = features["values"]

  # Read TFRecords file for validation
  validate_filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(VALIDATE_TFRECORDS_FILE),
      num_epochs=epoch_number)
  validate_serialized_example = read_and_decode(validate_filename_queue)
  validate_batch_serialized_example = tf.train.shuffle_batch(
      [validate_serialized_example],
      batch_size=validate_batch_size,
      num_threads=thread_number,
      capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  validate_features = tf.parse_example(
      validate_batch_serialized_example,
      features={
          "label": tf.FixedLenFeature(
              [], tf.float32),
          "ids": tf.VarLenFeature(tf.int64),
          "values": tf.VarLenFeature(tf.float32),
      })
  validate_batch_labels = validate_features["label"]
  validate_batch_ids = validate_features["ids"]
  validate_batch_values = validate_features["values"]

  # Define the model
  input_units = FEATURE_SIZE
  hidden1_units = 128
  hidden2_units = 32
  hidden3_units = 8
  output_units = LABEL_SIZE

  def full_connect(inputs, weights_shape, biases_shape, is_train=True):
    with tf.device('/cpu:0'):
      weights = tf.get_variable("weights",
                                weights_shape,
                                initializer=tf.random_normal_initializer())
      biases = tf.get_variable("biases",
                               biases_shape,
                               initializer=tf.random_normal_initializer())
      layer = tf.matmul(inputs, weights) + biases

      if FLAGS.enable_bn and is_train:
        mean, var = tf.nn.moments(layer, axes=[0])
        scale = tf.get_variable("scale",
                                biases_shape,
                                initializer=tf.random_normal_initializer())
        shift = tf.get_variable("shift",
                                biases_shape,
                                initializer=tf.random_normal_initializer())
        layer = tf.nn.batch_normalization(layer, mean, var, shift, scale,
                                          FLAGS.bn_epsilon)
    return layer

  def sparse_full_connect(sparse_ids,
                          sparse_values,
                          weights_shape,
                          biases_shape,
                          is_train=True):
    with tf.device('/cpu:0'):
      weights = tf.get_variable("weights",
                                weights_shape,
                                initializer=tf.random_normal_initializer())
      biases = tf.get_variable("biases",
                               biases_shape,
                               initializer=tf.random_normal_initializer())
    return tf.nn.embedding_lookup_sparse(weights,
                                         sparse_ids,
                                         sparse_values,
                                         combiner="sum") + biases

  def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True):
    return tf.nn.relu(full_connect(inputs, weights_shape, biases_shape,
                                   is_train))

  def dnn_inference(sparse_ids, sparse_values, is_train=True):
    with tf.variable_scope("layer1"):
      sparse_layer = sparse_full_connect(sparse_ids, sparse_values,
                                         [input_units, hidden1_units],
                                         [hidden1_units], is_train)
      layer = tf.nn.relu(sparse_layer)
    with tf.variable_scope("layer2"):
      layer = full_connect_relu(layer, [hidden1_units, hidden2_units],
                                [hidden2_units], is_train)
    with tf.variable_scope("layer3"):
      layer = full_connect_relu(layer, [hidden2_units, hidden3_units],
                                [hidden3_units], is_train)
    if FLAGS.enable_dropout and is_train:
      layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob)
    with tf.variable_scope("output"):
      layer = full_connect(layer, [hidden3_units, output_units],
                           [output_units], is_train)
    return layer

  def lr_inference(sparse_ids, sparse_values, is_train=True):
    with tf.variable_scope("logistic_regression"):
      layer = sparse_full_connect(sparse_ids, sparse_values,
                                  [input_units, output_units], [output_units])
    return layer

  def wide_and_deep_inference(sparse_ids, sparse_values, is_train=True):
    return lr_inference(sparse_ids, sparse_values, is_train) + dnn_inference(
        sparse_ids, sparse_values, is_train)

  def inference(sparse_ids, sparse_values, is_train=True):
    print("Use the model: {}".format(FLAGS.model))
    if FLAGS.model == "lr":
      return lr_inference(sparse_ids, sparse_values, is_train)
    elif FLAGS.model == "dnn":
      return dnn_inference(sparse_ids, sparse_values, is_train)
    elif FLAGS.model == "wide_and_deep":
      return wide_and_deep_inference(sparse_ids, sparse_values, is_train)
    else:
      print("Unknown model, exit now")
      exit(1)

  logits = inference(batch_ids, batch_values, True)
  batch_labels = tf.to_int64(batch_labels)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,
                                                                 batch_labels)
  loss = tf.reduce_mean(cross_entropy, name='loss')

  print("Use the optimizer: {}".format(FLAGS.optimizer))
  if FLAGS.optimizer == "sgd":
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  elif FLAGS.optimizer == "momentum":
    # optimizer = tf.train.MomentumOptimizer(learning_rate)
    print("Not support optimizer: {} yet, exit now".format(FLAGS.optimizer))
    exit(1)
  elif FLAGS.optimizer == "adadelta":
    optimizer = tf.train.AdadeltaOptimizer(learning_rate)
  elif FLAGS.optimizer == "adagrad":
    optimizer = tf.train.AdagradOptimizer(learning_rate)
  elif FLAGS.optimizer == "adam":
    optimizer = tf.train.AdamOptimizer(learning_rate)
  elif FLAGS.optimizer == "ftrl":
    optimizer = tf.train.FtrlOptimizer(learning_rate)
  elif FLAGS.optimizer == "rmsprop":
    optimizer = tf.train.RMSPropOptimizer(learning_rate)
  else:
    print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer))
    exit(1)

  with tf.device('/cpu:0'):
    global_step = tf.Variable(0, name='global_step', trainable=False)
  train_op = optimizer.minimize(loss, global_step=global_step)

  tf.get_variable_scope().reuse_variables()

  # Define accuracy op for train data
  train_accuracy_logits = inference(batch_ids, batch_values, False)
  train_softmax = tf.nn.softmax(train_accuracy_logits)
  train_correct_prediction = tf.equal(
      tf.argmax(train_softmax, 1), batch_labels)
  train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction,
                                          tf.float32))

  # Define auc op for train data
  batch_labels = tf.cast(batch_labels, tf.int32)
  sparse_labels = tf.reshape(batch_labels, [-1, 1])
  derived_size = tf.shape(batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(1, [indices, sparse_labels])
  outshape = tf.pack([derived_size, LABEL_SIZE])
  new_train_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax,
                                                  new_train_batch_labels)

  # Define accuracy op for validate data
  validate_accuracy_logits = inference(validate_batch_ids,
                                       validate_batch_values, False)
  validate_softmax = tf.nn.softmax(validate_accuracy_logits)
  validate_batch_labels = tf.to_int64(validate_batch_labels)
  validate_correct_prediction = tf.equal(
      tf.argmax(validate_softmax, 1), validate_batch_labels)
  validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction,
                                             tf.float32))

  # Define auc op for validate data
  validate_batch_labels = tf.cast(validate_batch_labels, tf.int32)
  sparse_labels = tf.reshape(validate_batch_labels, [-1, 1])
  derived_size = tf.shape(validate_batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(1, [indices, sparse_labels])
  outshape = tf.pack([derived_size, LABEL_SIZE])
  new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax,
                                                     new_validate_batch_labels)

  # Define inference op
  sparse_index = tf.placeholder(tf.int64, [None, 2])
  sparse_ids = tf.placeholder(tf.int64, [None])
  sparse_values = tf.placeholder(tf.float32, [None])
  sparse_shape = tf.placeholder(tf.int64, [2])
  inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape)
  inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape)
  inference_logits = inference(inference_ids, inference_values, False)
  inference_softmax = tf.nn.softmax(inference_logits)
  inference_op = tf.argmax(inference_softmax, 1)

  # Initialize saver and summary
  checkpoint_file = checkpoint_dir + "/checkpoint.ckpt"
  steps_to_validate = FLAGS.steps_to_validate
  tf.scalar_summary("loss", loss)
  tf.scalar_summary("train_accuracy", train_accuracy)
  tf.scalar_summary("train_auc", train_auc)
  tf.scalar_summary("validate_accuracy", validate_accuracy)
  tf.scalar_summary("validate_auc", validate_auc)
  saver = tf.train.Saver()
  keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1])
  keys = tf.identity(keys_placeholder)

  # Create session to run
  with tf.Session() as sess:
    summary_op = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter(tensorboard_dir, sess.graph)
    sess.run(tf.initialize_all_variables())
    sess.run(tf.initialize_local_variables())

    if mode == "train":
      ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        print("Continue training from the model {}".format(
            ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)

      # Get coordinator and run queues to read data
      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(coord=coord, sess=sess)

      start_time = datetime.datetime.now()
      try:
        while not coord.should_stop():
          _, loss_value, step = sess.run([train_op, loss, global_step])

          if step % steps_to_validate == 0:
            train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value, summary_value = sess.run(
                [train_accuracy, train_auc, validate_accuracy, validate_auc,
                 summary_op])
            end_time = datetime.datetime.now()
            print(
                "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format(
                    end_time - start_time, step, loss_value,
                    train_accuracy_value, train_auc_value,
                    validate_accuracy_value, auc_value))

            writer.add_summary(summary_value, step)
            saver.save(sess, checkpoint_file, global_step=step)
            start_time = end_time
      except tf.errors.OutOfRangeError:
        print("Done training after reading all data")
        print("Exporting trained model to {}".format(FLAGS.model_path))
        model_exporter = exporter.Exporter(saver)
        model_exporter.init(
            sess.graph.as_graph_def(),
            named_graph_signatures={
                'inputs': exporter.generic_signature({"keys": keys_placeholder,
                                                      "indexs": sparse_index,
                                                      "ids": sparse_ids,
                                                      "values": sparse_values,
                                                      "shape": sparse_shape}),
                'outputs': exporter.generic_signature(
                    {"keys": keys,
                     "softmax": inference_softmax,
                     "prediction": inference_op})
            })
        model_exporter.export(FLAGS.model_path,
                              tf.constant(FLAGS.export_version), sess)
      finally:
        coord.request_stop()

      # Wait for threads to exit
      coord.join(threads)

    elif mode == "export":
      print("Start to export model directly")

      # Load the checkpoint files
      ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
      else:
        print("No checkpoint found, exit now")
        exit(1)

      # Export the model files
      print("Exporting trained model to {}".format(FLAGS.model_path))
      model_exporter = exporter.Exporter(saver)
      model_exporter.init(
          sess.graph.as_graph_def(),
          named_graph_signatures={
              'inputs': exporter.generic_signature({"keys": keys_placeholder,
                                                    "indexs": sparse_index,
                                                    "ids": sparse_ids,
                                                    "values": sparse_values,
                                                    "shape": sparse_shape}),
              'outputs': exporter.generic_signature(
                  {"keys": keys,
                   "softmax": inference_softmax,
                   "prediction": inference_op})
          })
      model_exporter.export(FLAGS.model_path,
                            tf.constant(FLAGS.export_version), sess)

    elif mode == "inference":
      print("Start to run inference")
      start_time = datetime.datetime.now()

      inference_result_file_name = "./inference_result.txt"
      inference_test_file_name = "./data/a8a_test.libsvm"
      labels = []
      feature_ids = []
      feature_values = []
      feature_index = []
      ins_num = 0
      for line in open(inference_test_file_name, "r"):
        tokens = line.split(" ")
        labels.append(int(tokens[0]))

        feature_num = 0
        for feature in tokens[1:]:
          feature_id, feature_value = feature.split(":")
          feature_ids.append(int(feature_id))
          feature_values.append(float(feature_value))
          feature_index.append([ins_num, feature_num])
          feature_num += 1
        ins_num += 1

      ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        print("Use the model {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
      else:
        print("No model found, exit now")
        exit(1)

      prediction, prediction_softmax = sess.run(
          [inference_op, inference_softmax],
          feed_dict={sparse_index: feature_index,
                     sparse_ids: feature_ids,
                     sparse_values: feature_values,
                     sparse_shape: [ins_num, FEATURE_SIZE]})

      end_time = datetime.datetime.now()
      print("[{}] Inference result: {}".format(end_time - start_time,
                                               prediction))

      # Compute accuracy
      label_number = len(labels)
      correct_label_number = 0
      for i in range(label_number):
        if labels[i] == prediction[i]:
          correct_label_number += 1
      accuracy = float(correct_label_number) / label_number

      # Compute auc
      expected_labels = np.array(labels)
      predict_labels = prediction_softmax[:, 0]
      fpr, tpr, thresholds = metrics.roc_curve(expected_labels,
                                               predict_labels,
                                               pos_label=0)
      auc = metrics.auc(fpr, tpr)
      print("For inference data, accuracy: {}, auc: {}".format(accuracy, auc))

      # Save inference result into file
      np.savetxt(inference_result_file_name, prediction, delimiter=",")
      print("Save result to file: {}".format(inference_result_file_name))
def simple_fake_sequence_to_prediction(export_path, eval_export_path):
  """Trains and exports a fake_sequence_to_prediction model."""

  input_feature_spec = {
      'values_t1': tf.VarLenFeature(dtype=tf.float32),
      'values_t2': tf.VarLenFeature(dtype=tf.float32),
      'values_t3': tf.VarLenFeature(dtype=tf.float32)
  }
  label_feature_spec = dict(input_feature_spec)
  label_feature_spec['label'] = tf.FixedLenFeature([1], dtype=tf.float32)

  def _make_embedding_and_sparse_values(features):
    """Make "embedding" and "sparse_values" features."""
    embedding_dim = 3
    sparse_dims = 3
    sparse_timesteps = 3

    # Create a three-dimensional "embedding" based on the value of the feature
    # The embedding is simply [1, 1, 1] * feature_value
    # (or [0, 0, 0] if the feature is missing).
    batch_size = tf.cast(tf.shape(features['values_t1'])[0], dtype=tf.int64)

    ones = tf.ones(shape=[embedding_dim])
    dense_t1 = tf.sparse_tensor_to_dense(features['values_t1'])
    dense_t2 = tf.sparse_tensor_to_dense(features['values_t2'])
    dense_t3 = tf.sparse_tensor_to_dense(features['values_t3'])
    embedding_t1 = ones * dense_t1
    embedding_t2 = ones * dense_t2
    embedding_t3 = ones * dense_t3
    embeddings = tf.stack([embedding_t1, embedding_t2, embedding_t3], axis=1)
    features['embedding'] = embeddings
    del features['values_t1']
    del features['values_t2']
    del features['values_t3']

    # Make the "sparse_values" feature.
    sparse_values = tf.squeeze(
        tf.concat(
            [
                dense_t1, dense_t1**2, dense_t1**3, dense_t2, dense_t2**2,
                dense_t2**3, dense_t3, dense_t3**2, dense_t3**3
            ],
            axis=0))
    sparse_total_elems = batch_size * sparse_dims * sparse_timesteps
    seq = tf.range(0, sparse_total_elems, dtype=tf.int64)
    batch_num = seq % batch_size
    timestep = tf.div(seq, batch_size * sparse_dims)
    offset = tf.div(seq, batch_size) % sparse_dims
    sparse_indices = tf.stack([batch_num, timestep, offset], axis=1)
    features['sparse_values'] = tf.SparseTensor(
        indices=sparse_indices,
        values=sparse_values,
        dense_shape=[batch_size, sparse_timesteps, sparse_dims])

  def model_fn(features, labels, mode, params):
    """Model function for custom estimator."""
    del params
    dense_values = tf.sparse_tensor_to_dense(
        features['sparse_values'], validate_indices=False)
    a = tf.Variable(1.0, dtype=tf.float32, name='a')
    b = tf.Variable(2.0, dtype=tf.float32, name='b')
    c = tf.Variable(3.0, dtype=tf.float32, name='c')
    d = tf.Variable(4.0, dtype=tf.float32, name='d')
    e = tf.Variable(5.0, dtype=tf.float32, name='e')
    f = tf.Variable(6.0, dtype=tf.float32, name='f')
    predictions = (
        a * tf.reduce_sum(features['embedding'][:, 0, :], axis=1) +
        b * tf.reduce_sum(features['embedding'][:, 1, :], axis=1) +
        c * tf.reduce_sum(features['embedding'][:, 2, :], axis=1) +
        d * tf.reduce_sum(dense_values[:, 0, :], axis=1) +
        e * tf.reduce_sum(dense_values[:, 1, :], axis=1) +
        f * tf.reduce_sum(dense_values[:, 2, :], axis=1))

    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions={'score': predictions},
          export_outputs={
              'score': tf.estimator.export.RegressionOutput(predictions)
          })

    loss = tf.losses.mean_squared_error(labels,
                                        tf.expand_dims(predictions, axis=-1))

    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
    train_op = optimizer.minimize(
        loss=loss, global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=loss,
        train_op=train_op,
        eval_metric_ops={
            'mean_squared_error':
                tf.metrics.mean_squared_error(
                    labels, tf.expand_dims(predictions, axis=-1)),
            'mean_prediction':
                tf.metrics.mean(predictions),
        },
        predictions=predictions)

  def train_input_fn():
    """Train input function."""

    def make_example_with_label(values_t1=None, values_t2=None, values_t3=None):
      """Make example with label."""
      effective_t1 = 0.0
      effective_t2 = 0.0
      effective_t3 = 0.0
      args = {}
      if values_t1 is not None:
        args['values_t1'] = float(values_t1)
        effective_t1 = values_t1
      if values_t2 is not None:
        args['values_t2'] = float(values_t2)
        effective_t2 = values_t2
      if values_t3 is not None:
        args['values_t3'] = float(values_t3)
        effective_t3 = values_t3
      label = (3 * effective_t1 + 6 * effective_t2 + 9 * effective_t3 +
               4 * (effective_t1 + effective_t1**2 + effective_t1**3) +
               5 * (effective_t2 + effective_t2**2 + effective_t2**3) +
               6 * (effective_t3 + effective_t3**2 + effective_t3**3))
      args['label'] = float(label)
      return util.make_example(**args)

    examples = [
        make_example_with_label(values_t1=1.0),
        make_example_with_label(values_t2=1.0),
        make_example_with_label(values_t3=1.0),
        make_example_with_label(values_t1=2.0, values_t2=3.0),
        make_example_with_label(values_t1=5.0, values_t3=7.0),
        make_example_with_label(values_t2=11.0, values_t3=13.0),
        make_example_with_label(values_t1=2.0, values_t2=3.0, values_t3=5.0),
    ]
    serialized_examples = [x.SerializeToString() for x in examples]
    features = tf.parse_example(serialized_examples, label_feature_spec)
    _make_embedding_and_sparse_values(features)
    label = features.pop('label')
    return features, label

  def serving_input_receiver_fn():
    """Serving input receiver function."""
    serialized_tf_example = tf.placeholder(
        dtype=tf.string, shape=[None], name='input_example_tensor')
    receiver_tensors = {'examples': serialized_tf_example}
    features = tf.parse_example(serialized_tf_example, input_feature_spec)
    _make_embedding_and_sparse_values(features)
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

  def eval_input_receiver_fn():
    """Eval input receiver function."""
    serialized_tf_example = tf.placeholder(
        dtype=tf.string, shape=[None], name='input_example_tensor')
    receiver_tensors = {'examples': serialized_tf_example}
    features = tf.parse_example(serialized_tf_example, label_feature_spec)
    _make_embedding_and_sparse_values(features)

    return export.EvalInputReceiver(
        features=features,
        receiver_tensors=receiver_tensors,
        labels=features['label'])

  estimator = tf.estimator.Estimator(model_fn=model_fn)
  estimator.train(input_fn=train_input_fn, steps=10)

  export_dir = None
  eval_export_dir = None
  if export_path:
    export_dir = estimator.export_savedmodel(
        export_dir_base=export_path,
        serving_input_receiver_fn=serving_input_receiver_fn)

  if eval_export_path:
    eval_export_dir = export.export_eval_savedmodel(
        estimator=estimator,
        export_dir_base=eval_export_path,
        eval_input_receiver_fn=eval_input_receiver_fn)

  return export_dir, eval_export_dir
示例#19
0
def parse_function(serialize_string):
    feature_description = {
        'input': tf.VarLenFeature(dtype=tf.int64),
        'output': tf.VarLenFeature(dtype=tf.int64),
    }
    return tf.io.parse_single_example(serialize_string, feature_description)
def simple_control_dependency_estimator(export_path, eval_export_path):
    """Exports a simple estimator with control dependencies."""
    def control_dependency_metric(increment, target):
        """Metric that introduces a control dependency on target.

    The value is incremented by increment each time the metric is called
    (so the value can vary depending on how things are batched). This is mainly
    to verify that the metric was called.

    Args:
      increment: Amount to increment the value by each time the metric is
        called.
      target: Tensor to introduce the control dependency on.

    Returns:
      value_op, update_op for the metric.
    """

        total_value = tf.Variable(initial_value=0.0,
                                  dtype=tf.float64,
                                  trainable=False,
                                  collections=[
                                      tf.GraphKeys.METRIC_VARIABLES,
                                      tf.GraphKeys.LOCAL_VARIABLES
                                  ],
                                  validate_shape=True)

        with tf.control_dependencies([target]):
            update_op = tf.assign_add(total_value, increment)
        value_op = tf.identity(total_value)
        return value_op, update_op

    def model_fn(features, labels, mode, params):
        """Model function for custom estimator."""
        del params
        predictions = features['prediction']
        predictions_dict = {
            prediction_keys.PredictionKeys.PREDICTIONS: predictions,
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions_dict,
                export_outputs={
                    tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    tf.estimator.export.RegressionOutput(predictions)
                })

        loss = tf.losses.mean_squared_error(predictions,
                                            labels['actual_label'])
        train_op = tf.assign_add(tf.train.get_global_step(), 1)

        eval_metric_ops = {}
        if mode == tf.estimator.ModeKeys.EVAL:
            eval_metric_ops = {
                metric_keys.MetricKeys.LOSS_MEAN:
                tf.metrics.mean(loss),
                'control_dependency_on_fixed_float':
                control_dependency_metric(1.0, features['fixed_float']),
                # Introduce a direct dependency on the values Tensor. If we
                # introduce another intervening op like sparse_tensor_to_dense then
                # regardless of whether TFMA correctly wrap SparseTensors we will not
                # encounter the TF bug.
                'control_dependency_on_var_float':
                control_dependency_metric(10.0, features['var_float'].values),
                'control_dependency_on_actual_label':
                control_dependency_metric(100.0, labels['actual_label']),
                'control_dependency_on_var_int_label':
                control_dependency_metric(1000.0, labels['var_int'].values),
                # Note that TFMA does *not* wrap predictions, so in most cases
                # if there's a control dependency on predictions they will be
                # recomputed.
                'control_dependency_on_prediction':
                control_dependency_metric(10000.0, predictions),
            }

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          predictions=predictions_dict,
                                          eval_metric_ops=eval_metric_ops)

    def train_input_fn():
        """Train input function."""
        return {
            'prediction': tf.constant([[1.0], [2.0], [3.0], [4.0]]),
        }, {
            'actual_label': tf.constant([[1.0], [2.0], [3.0], [4.0]])
        }

    feature_spec = {'prediction': tf.FixedLenFeature([1], dtype=tf.float32)}
    eval_feature_spec = {
        'prediction': tf.FixedLenFeature([1], dtype=tf.float32),
        'label': tf.FixedLenFeature([1], dtype=tf.float32),
        'fixed_float': tf.FixedLenFeature([1], dtype=tf.float32),
        'fixed_string': tf.FixedLenFeature([1], dtype=tf.string),
        'fixed_int': tf.FixedLenFeature([1], dtype=tf.int64),
        'var_float': tf.VarLenFeature(dtype=tf.float32),
        'var_string': tf.VarLenFeature(dtype=tf.string),
        'var_int': tf.VarLenFeature(dtype=tf.int64),
    }

    estimator = tf.estimator.Estimator(model_fn=model_fn)
    estimator.train(input_fn=train_input_fn, steps=1)

    def eval_input_receiver_fn():
        """An input_fn that expects a serialized tf.Example."""
        serialized_tf_example = tf.placeholder(dtype=tf.string,
                                               shape=[None],
                                               name='input_example_tensor')
        features = tf.parse_example(serialized_tf_example, eval_feature_spec)
        labels = {
            'actual_label': features['label'],
            'var_int': features['var_int']
        }
        return export.EvalInputReceiver(
            features=features,
            labels=labels,
            receiver_tensors={'examples': serialized_tf_example})

    return util.export_model_and_eval_model(
        estimator=estimator,
        serving_input_receiver_fn=(
            tf.estimator.export.build_parsing_serving_input_receiver_fn(
                feature_spec)),
        eval_input_receiver_fn=eval_input_receiver_fn,
        export_path=export_path,
        eval_export_path=eval_export_path)
示例#21
0
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions
    Args:
      split_name: A train/test split name.
      dataset_dir: The base directory of the dataset sources.
      file_pattern: The file pattern to use when matching the dataset sources.
        It is assumed that the pattern contains a '%s' string so that the split
        name can be inserted.
      reader: The TensorFlow reader type.
    Returns:
      A `Dataset` namedtuple.
    Raises:
      ValueError: if `split_name` is not a valid train/test split.
    """
    if split_name not in SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader
#     #文件名格式
#     if file_pattern is None:
#         file_pattern = _get_output_filename('tfrecords','voc_2007_train')#need fix your filename
#     print(file_pattern)

# 适配器1:将example反序列化成存储之前的格式。由tf完成
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/channels': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
    }

    #适配器2:将反序列化的数据组装成更高级的格式。由slim完成
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/difficult':
        slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
        'object/truncated':
        slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
    }
    # 解码器
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    # dataset对象定义了数据集的文件位置,解码方式等元信息
    dataset = slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=reader,
        num_samples=SPLITS_TO_SIZES['test'],  # 手动生成了三个文件, 每个文件里只包含一个example
        decoder=decoder,
        items_to_descriptions=ITEMS_TO_DESCRIPTIONS,
        num_classes=NUM_CLASSES)
    return dataset
def read_labels(data_pattern, cache_path=""):
    """Read labels from TFRecords.

  Args:
    data_pattern: the data pattern to the TFRecords.
    cache_path: the cache path for the label file.

  Returns:
    a Labels object.
  """
    if cache_path:
        if tf.gfile.Exists(cache_path):
            tf.logging.info("Reading cached labels from %s..." % cache_path)
            return Labels.from_file(cache_path)
    tf.enable_eager_execution()

    if 'validate' in data_pattern:
        with tf.name_scope("eval_input"):
            # randomly chosen 60 validate files
            # note that validate file names are different on gcloud and locally, due to `curl` download command
            results = []
            for i in range(3844):
                results.append(str(i).zfill(4))
            random.seed(7)
            random.shuffle(results)
            validate_file_nums = results[:300]

            validate_file_list_60 = [data_pattern.split('*')[0]\
                                     + x +'.tfrecord' for x in validate_file_nums]
            data_paths = validate_file_list_60
    else:
        data_paths = tf.gfile.Glob(data_pattern)

    ds = tf.data.TFRecordDataset(data_paths, num_parallel_reads=50)
    context_features = {
        "id": tf.FixedLenFeature([], tf.string),
        "segment_labels": tf.VarLenFeature(tf.int64),
        "segment_start_times": tf.VarLenFeature(tf.int64),
        "segment_scores": tf.VarLenFeature(tf.float32)
    }

    def _parse_se_func(sequence_example):
        return tf.parse_single_sequence_example(
            sequence_example, context_features=context_features)

    ds = ds.map(_parse_se_func)
    rated_labels = {}
    tf.logging.info("Reading labels from TFRecords...")
    last_batch = 0
    batch_size = 5000
    for cxt_feature_val, _ in ds:
        video_id = cxt_feature_val["id"].numpy()
        segment_labels = cxt_feature_val["segment_labels"].values.numpy()
        segment_start_times = cxt_feature_val[
            "segment_start_times"].values.numpy()
        segment_scores = cxt_feature_val["segment_scores"].values.numpy()
        for label, start_time, score in zip(segment_labels,
                                            segment_start_times,
                                            segment_scores):
            rated_labels[("%s:%d" % (video_id.decode("utf-8"), start_time),
                          label)] = score
        batch_id = len(rated_labels) // batch_size
        if batch_id != last_batch:
            tf.logging.info("%d examples processed.", len(rated_labels))
            last_batch = batch_id
    tf.logging.info("Finish reading labels from TFRecords...")
    labels_obj = Labels(rated_labels)
    if cache_path:
        tf.logging.info("Caching labels to %s..." % cache_path)
        labels_obj.to_file(cache_path)
    return labels_obj
示例#23
0
def get_datasets(data_dir, file_pattern='*.tfrecord'):
    file_patterns = os.path.join(data_dir, file_pattern)
    print('file_path: {}'.format(file_patterns))
    file_path_list = glob.glob(file_patterns)
    #num_samples = 0      #only for icdar 2015 dataset
    #num_samples = 288688 #only for ppt datasets
    num_samples = 858750  #only for synth datasets

    for file_path in file_path_list:
        for _ in tf.python_io.tf_record_iterator(file_path):
            num_samples += 1
    print('num_samples:', num_samples)

    reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ignored': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
    }

    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'filename':
        slim.tfexample_decoder.Tensor('image/filename'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/oriented_bbox/x1':
        slim.tfexample_decoder.Tensor('image/object/bbox/x1'),
        'object/oriented_bbox/x2':
        slim.tfexample_decoder.Tensor('image/object/bbox/x2'),
        'object/oriented_bbox/x3':
        slim.tfexample_decoder.Tensor('image/object/bbox/x3'),
        'object/oriented_bbox/x4':
        slim.tfexample_decoder.Tensor('image/object/bbox/x4'),
        'object/oriented_bbox/y1':
        slim.tfexample_decoder.Tensor('image/object/bbox/y1'),
        'object/oriented_bbox/y2':
        slim.tfexample_decoder.Tensor('image/object/bbox/y2'),
        'object/oriented_bbox/y3':
        slim.tfexample_decoder.Tensor('image/object/bbox/y3'),
        'object/oriented_bbox/y4':
        slim.tfexample_decoder.Tensor('image/object/bbox/y4'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        'object/ignored':
        slim.tfexample_decoder.Tensor('image/object/bbox/ignored')
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = {0: 'background', 1: 'text'}

    return slim.dataset.Dataset(data_sources=file_patterns,
                                reader=reader,
                                decoder=decoder,
                                num_samples=num_samples,
                                items_to_descriptions=ITEMS_TO_DESCRIPTIONS,
                                num_classes=NUM_CLASSES,
                                labels_to_names=labels_to_names)
示例#24
0
    def _parse_function(self, sequence_example_proto):
        """Parse a SequenceExample in the AutoDL/TensorFlow format.

        Args:
          sequence_example_proto: a SequenceExample with "x_dense_input" or sparse
              input representation.
        Returns:
          An array of tensors. For first edition of AutoDl challenge, returns a
              pair `(features, labels)` where `features` is a Tensor of shape
                [sequence_size, row_count, col_count, num_channels]
              and `labels` a Tensor of shape
                [output_dim, ]
        """
        sequence_features = {}
        for i in range(self.metadata_.get_bundle_size()):
            if self.metadata_.is_sparse(i):
                sequence_features[self._feature_key(
                    i, "sparse_col_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_row_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_value")] = tf.VarLenFeature(tf.float32)
            elif self.metadata_.is_compressed(i):
                sequence_features[self._feature_key(
                    i, "compressed")] = tf.VarLenFeature(tf.string)
            else:
                sequence_features[self._feature_key(
                    i, "dense_input")] = tf.FixedLenSequenceFeature(
                    self.metadata_.get_tensor_size(i), dtype=tf.float32)
        print('sequence_features')
        print(sequence_features)
        contexts, features = tf.parse_single_sequence_example(
            sequence_example_proto,
            context_features={
                "label_index": tf.VarLenFeature(tf.int64),
                "label_score": tf.VarLenFeature(tf.float32)
            },
            sequence_features=sequence_features)
        print('features')
        print(features)
        sample = []
        for i in range(self.metadata_.get_bundle_size()):
            key_dense = self._feature_key(i, "dense_input")
            row_count, col_count = self.metadata_.get_matrix_size(i)
            num_channels = self.metadata_.get_num_channels(i)
            sequence_size = self.metadata_.get_sequence_size()
            fixed_matrix_size = row_count > 0 and col_count > 0
            row_count = row_count if row_count > 0 else None
            col_count = col_count if col_count > 0 else None
            if key_dense in features:
                f = features[key_dense]
                if not fixed_matrix_size:
                    raise ValueError("To parse dense data, the tensor shape should " +
                                     "be known but got {} instead..." \
                                     .format((sequence_size, row_count, col_count)))
                f = tf.reshape(f, [sequence_size, row_count, col_count, num_channels])
                sample.append(f)

            sequence_size = sequence_size if sequence_size > 0 else None
            key_compressed = self._feature_key(i, "compressed")
            if key_compressed in features:
                compressed_images = features[key_compressed].values
                decompress_image_func = \
                    lambda x: dataset_utils.decompress_image(x, num_channels=num_channels)
                # `images` here is a 4D-tensor of shape [T, H, W, C], some of which
                # might be unknown
                images = tf.map_fn(
                    decompress_image_func,
                    compressed_images, dtype=tf.float32)
                images.set_shape([sequence_size, row_count, col_count, num_channels])
                sample.append(images)

            key_sparse_val = self._feature_key(i, "sparse_value")
            if key_sparse_val in features:
                key_sparse_col = self._feature_key(i, "sparse_col_index")
                key_sparse_row = self._feature_key(i, "sparse_row_index")
                sparse_col = features[key_sparse_col].values
                sparse_row = features[key_sparse_row].values
                sparse_val = features[key_sparse_val]
                indices = sparse_val.indices
                indices = tf.concat([
                    tf.reshape(indices[:, 0], [-1, 1]),
                    tf.reshape(sparse_row, [-1, 1]),
                    tf.reshape(sparse_col, [-1, 1])
                ], 1)
                sparse_tensor = tf.sparse_reorder(
                    tf.SparseTensor(
                        indices, sparse_val.values,
                        [sequence_size, row_count, col_count]))
                # TODO: see how we can keep sparse tensors instead of
                # returning dense ones.
                tensor = tf.sparse_tensor_to_dense(sparse_tensor)
                tensor = tf.reshape(tensor,
                                    [sequence_size, row_count, col_count, 1])
                sample.append(tensor)

        labels = tf.sparse_to_dense(
            contexts["label_index"].values,
            (self.metadata_.get_output_size(),),
            contexts["label_score"].values,
            validate_indices=False)
        # sparse_tensor = tf.sparse.SparseTensor(indices=(contexts["label_index"].values,),
        #                                       values=contexts["label_score"].values,
        #                                       dense_shape=(self.metadata_.get_output_size(),))
        # labels = tf.sparse.to_dense(sparse_tensor, validate_indices=False)
        sample.append(labels)
        return sample
示例#25
0
def slim_get_batch(num_classes,
                   batch_size,
                   split_name,
                   file_pattern,
                   num_readers,
                   num_epochs=None,
                   is_training=True):
    """获取一个数据集元组,其中包含有关读取P数据集的说明。
    Args:
      num_classes:数据集中的总类数。
      batch_size: the size of each batch.
      split_name: 'train' of 'val'.
      file_pattern: 匹配数据集源时使用的文件模式(完整路径)。
      num_readers: 用于阅读tfrecords的最大阅读器数量。
      num_preprocessing_threads: 用于运行预处理功能的最大线程数。
      image_preprocessing_fn: 用于数据集扩充的函数。
      anchor_encoder: 用于编码所有锚点的函数。
      num_epochs: 用于迭代此数据集的总epoches。
      is_training:
    Returns:     allow_smaller_final_batch=(not is_training),
                    num_threads=num_preprocessing_threads,
                    capacity=64 * batch_size)
    """
    if split_name not in data_splits_num:
        raise ValueError('split name %s was not recognized.' % split_name)

    # Features in Pascal VOC TFRecords.
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string,
                                           default_value='jpeg'),
        'image/height': tf.FixedLenFeature([1], tf.int64),
        'image/width': tf.FixedLenFeature([1], tf.int64),
        'image/shape': tf.FixedLenFeature([3], tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
    }
    items_to_handlers = {
        'image':
        slim.tfexample_decoder.Image('image/encoded', 'image/format'),
        'shape':
        slim.tfexample_decoder.Tensor('image/shape'),
        'object/bbox':
        slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                           'image/object/bbox/'),
        'object/label':
        slim.tfexample_decoder.Tensor('image/object/bbox/label'),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = {}
    for name, pair in classes.items():
        labels_to_names[pair[0]] = name
    # print('label_names',labels_to_names)
    dataset = slim.dataset.Dataset(data_sources=file_pattern,
                                   reader=tf.TFRecordReader,
                                   decoder=decoder,
                                   num_samples=data_splits_num[split_name],
                                   items_to_descriptions=None,
                                   num_classes=num_classes,
                                   labels_to_names=labels_to_names)

    with tf.name_scope('dataset_data_provider'):
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=num_readers,
            common_queue_capacity=32 * batch_size,
            common_queue_min=8 * batch_size,
            shuffle=is_training,
            num_epochs=num_epochs)

    # [image, shape, glabels_raw, gbboxes_raw] = provider.get(['image', 'shape',
    #                                                                    'object/label','object/bbox'])

    with tf.Session() as sess:
        sess.run([
            tf.local_variables_initializer(),
            tf.global_variables_initializer()
        ])
        tf.train.start_queue_runners()
        for i in range(provider._num_samples):
            [image, labelList, boxList, shape] = provider.get(
                ['image', 'object/label', 'object/bbox', 'shape'])
            img, labels, boxes, shape = sess.run(
                [image, labelList, boxList, shape])
            print(labels)

            # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#不转换反而是RGB显示
            # print('{}is ,has shape :{}'.format(img, shape))
            # img=cv2.imread(img)
            # img = img / 255.0#归一化以后会化成黑色
            for j in range(len(labels)):
                print('value:', (boxes[j][0], boxes[j][1]),
                      (boxes[j][2], boxes[j][3]))
                cv2.rectangle(
                    img,
                    (int(boxes[j][0] * shape[0]), int(boxes[j][1] * shape[1])),
                    (int(boxes[j][2] * shape[0]), int(boxes[j][3] * shape[1])),
                    (0, 255, 0), 3)
            plt.imshow(img)
            plt.show()

            cv2.imwrite("./rec.jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            # plt.show()

            break
        capacity=capacity,
        min_after_dequeue=min_after_dequeue,
        enqueue_many=True)

else:
    serialized_example = read_and_decode(filename_queue)
    batch_serialized_example = tf.train.shuffle_batch(
        [serialized_example],
        batch_size=batch_size,
        num_threads=thread_number,
        capacity=capacity,
        min_after_dequeue=min_after_dequeue)
features = tf.parse_example(batch_serialized_example,
                            features={
                                "label": tf.FixedLenFeature([], tf.float32),
                                "ids": tf.VarLenFeature(tf.int64),
                                "values": tf.VarLenFeature(tf.float32),
                            })

batch_labels = features["label"]
batch_ids = features["ids"]
batch_values = features["values"]

init_op = tf.global_variables_initializer()

sess = tf.Session()
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
sess.run(init_op, options=run_options, run_metadata=run_metadata)
sess.run(tf.local_variables_initializer(),
         options=run_options,
示例#27
0
def parse_example_proto(example_serialized):
    """Parses an Example proto containing a training example of an image.

  The output of the build_image_data.py image preprocessing script is a dataset
  containing serialized Example protocol buffers. Each Example proto contains
  the following fields:

    image/height: 462
    image/width: 581
    image/colorspace: 'RGB'
    image/channels: 3
    image/class/label: 615
    image/class/synset: 'n03623198'
    image/class/text: 'knee pad'
    image/object/bbox/xmin: 0.1
    image/object/bbox/xmax: 0.9
    image/object/bbox/ymin: 0.2
    image/object/bbox/ymax: 0.6
    image/object/bbox/label: 615
    image/format: 'JPEG'
    image/filename: 'ILSVRC2012_val_00041207.JPEG'
    image/encoded: <JPEG encoded string>

  Args:
    example_serialized: scalar Tensor tf.string containing a serialized
      Example protocol buffer.

  Returns:
    filename: Tensor tf.string containing the filename
    label: Tensor tf.int32 containing the label.
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged as
      [ymin, xmin, ymax, xmax].
    text: Tensor tf.string containing the human-readable label.
  """
    # Dense features in Example proto.
    feature_map = {
        'image/filename':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/class/label':
        tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
        'image/class/text':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
    }
    sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
    # Sparse features in Example proto.
    feature_map.update({
        k: sparse_float32
        for k in [
            'image/object/bbox/xmin', 'image/object/bbox/ymin',
            'image/object/bbox/xmax', 'image/object/bbox/ymax'
        ]
    })

    features = tf.parse_single_example(example_serialized, feature_map)
    label = tf.cast(features['image/class/label'], dtype=tf.int32)

    xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
    ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
    xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
    ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)

    # Note that we impose an ordering of (y, x) just to make life difficult.
    bbox = tf.concat(0, [ymin, xmin, ymax, xmax])

    # Force the variable number of bounding boxes into the shape
    # [1, num_boxes, coords].
    bbox = tf.expand_dims(bbox, 0)
    bbox = tf.transpose(bbox, [0, 2, 1])

    return features['image/filename'], label, bbox, features[
        'image/class/text']
示例#28
0
def get_split(split_name,
              dataset_dir,
              data_name='Market1501',
              file_pattern=None,
              reader=None):
    """Gets a dataset tuple with instructions for reading Market1501.

  Args:
    split_name: A train/validation split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/validation split.
  """
    if split_name not in SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir,
                                file_pattern % (data_name, split_name))

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image_raw_0': tf.FixedLenFeature([], tf.string),
        'image_raw_1': tf.FixedLenFeature([], tf.string),
        'label':
        tf.FixedLenFeature([],
                           tf.int64),  # For FixedLenFeature, [] means scalar
        'id_0': tf.FixedLenFeature([], tf.int64),
        'id_1': tf.FixedLenFeature([], tf.int64),
        'cam_0': tf.FixedLenFeature([], tf.int64),
        'cam_1': tf.FixedLenFeature([], tf.int64),
        'image_format': tf.FixedLenFeature([], tf.string, default_value='jpg'),
        'image_height': tf.FixedLenFeature([], tf.int64, default_value=128),
        'image_width': tf.FixedLenFeature([], tf.int64, default_value=64),
        'real_data': tf.FixedLenFeature([], tf.int64, default_value=1),
        'pose_peaks_0': tf.FixedLenFeature([16 * 8 * 18], tf.float32),
        'pose_peaks_1': tf.FixedLenFeature([16 * 8 * 18], tf.float32),
        'pose_mask_r4_0': tf.FixedLenFeature([128 * 64 * 1], tf.int64),
        'pose_mask_r4_1': tf.FixedLenFeature([128 * 64 * 1], tf.int64),
        'shape': tf.FixedLenFeature([1], tf.int64),
        'indices_r4_0': tf.VarLenFeature(dtype=tf.int64),
        'values_r4_0': tf.VarLenFeature(dtype=tf.float32),
        'indices_r4_1': tf.VarLenFeature(dtype=tf.int64),
        'values_r4_1': tf.VarLenFeature(dtype=tf.float32),
        'pose_subs_0': tf.FixedLenFeature([20], tf.float32),
        'pose_subs_1': tf.FixedLenFeature([20], tf.float32),
    }

    items_to_handlers = {
        'image_raw_0':
        slim.tfexample_decoder.Image(image_key='image_raw_0',
                                     format_key='image_format'),
        'image_raw_1':
        slim.tfexample_decoder.Image(image_key='image_raw_1',
                                     format_key='image_format'),
        'label':
        slim.tfexample_decoder.Tensor('label'),
        'id_0':
        slim.tfexample_decoder.Tensor('id_0'),
        'id_1':
        slim.tfexample_decoder.Tensor('id_1'),
        'pose_peaks_0':
        slim.tfexample_decoder.Tensor('pose_peaks_0', shape=[16 * 8 * 18]),
        'pose_peaks_1':
        slim.tfexample_decoder.Tensor('pose_peaks_1', shape=[16 * 8 * 18]),
        'pose_mask_r4_0':
        slim.tfexample_decoder.Tensor('pose_mask_r4_0', shape=[128 * 64 * 1]),
        'pose_mask_r4_1':
        slim.tfexample_decoder.Tensor('pose_mask_r4_1', shape=[128 * 64 * 1]),
        'pose_sparse_r4_0':
        slim.tfexample_decoder.SparseTensor(indices_key='indices_r4_0',
                                            values_key='values_r4_0',
                                            shape_key='shape',
                                            densify=False),
        'pose_sparse_r4_1':
        slim.tfexample_decoder.SparseTensor(indices_key='indices_r4_1',
                                            values_key='values_r4_1',
                                            shape_key='shape',
                                            densify=False),
        'pose_subs_0':
        slim.tfexample_decoder.Tensor('pose_subs_0', shape=[20]),
        'pose_subs_1':
        slim.tfexample_decoder.Tensor('pose_subs_1', shape=[20]),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)

    print('load pn_pairs_num ......')
    fpath = os.path.join(dataset_dir, 'pn_pairs_num_' + split_name + '.p')
    with open(fpath, 'r') as f:
        pn_pairs_num = pickle.load(f)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=pn_pairs_num,
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=_NUM_CLASSES,
                                labels_to_names=labels_to_names)
示例#29
0
 def example_reading_spec(self):
     data_fields, _ = super(Seq2editsGec, self).example_reading_spec()
     data_fields['targets_error_tag'] = tf.VarLenFeature(tf.int64)
     return data_fields, None
示例#30
0
def test():
    vocab_size = len(open(FLAGS.vocab_file).readlines())
    id_to_label = load_id_to_label()
    num_label = len(id_to_label)
    print('#vocab={} #label={}'.format(vocab_size, num_label))

    data_sources = [
        FLAGS.train_tfrecord,
    ]
    is_training = True
    reader = tf.TFRecordReader
    keys_to_features = {
        TEXT_KEY:
        tf.VarLenFeature(dtype=tf.string),
        LABELS_KEY:
        tf.FixedLenFeature([num_label],
                           tf.float32,
                           default_value=tf.zeros([num_label],
                                                  dtype=tf.float32)),
    }

    items_to_handlers = {
        'text': slim.tfexample_decoder.Tensor(TEXT_KEY,
                                              default_value=DEFAULT_WORD),
        'labels': slim.tfexample_decoder.Tensor(LABELS_KEY),
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)
    num_samples = 1  # np.inf
    items_to_descriptions = {
        'text': 'text',
        'labels': 'labels',
    }
    dataset = slim.dataset.Dataset(
        data_sources=data_sources,
        reader=reader,
        decoder=decoder,
        num_samples=num_samples,
        items_to_descriptions=items_to_descriptions,
    )
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset, shuffle=is_training)
    text_ts, labels_ts, = provider.get(['text', 'labels'])

    # with tf.Session() as sess:
    #     with slim.queues.QueueRunners(sess):
    #         for i in range(10000):
    #             text_np, labels_np = sess.run([text_ts, labels_ts])
    #             label_ids = [i for i in range(num_label) if labels_np[i] != 0]
    #             labels = [id_to_label[label_id] for label_id in label_ids]
    #             text = [text_np[i].decode('utf-8') for i in range(text_np.shape[0]) if text_np[i] != b' ']
    #             text = ' '.join(text)
    #             print(str(text), labels)
    #             input()

    text_bt, labels_bt = tf.train.batch([text_ts, labels_ts],
                                        batch_size=FLAGS.batch_size,
                                        dynamic_pad=True)

    with tf.Session() as sess:
        with slim.queues.QueueRunners(sess):
            for i in range(10000):
                text_np, labels_np = sess.run([text_bt, labels_bt])
                print(type(text_np), type(labels_np))
                print(text_np.shape, labels_np.shape)
                input()