import tensorflow as tf import numpy as np import os from datautil.ssd_vgg_preprocessing import preprocess_for_train, preprocess_for_eval from model import ssd_common from tfutil import tf_utils features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/channels': tf.FixedLenFeature([1], tf.int64), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), } def get_parser_func(anchors, num_classes, is_training, var_scope): ''' Dataset parser function for training and evaluation Arguments: preprocess_fn - function that does preprocesing
def parse_example_proto( self, example_serialized): #TODO(lowres check this mainly) # Dense features in Example proto. feature_map = { 'image/encoded': tf.VarLenFeature(dtype=tf.string), 'image/speeds': tf.VarLenFeature(dtype=tf.float32), 'image/class/video_name': tf.FixedLenFeature([1], dtype=tf.string, default_value=''), } if FLAGS.only_seg == 1: feature_map.update({ 'image/segmentation': tf.VarLenFeature(dtype=tf.string), 'image/context': tf.VarLenFeature(dtype=tf.string) }) if FLAGS.use_speed_yaw: feature_map.update({ 'sensor/yaw_imu': tf.VarLenFeature(dtype=tf.float32), 'sensor/speed_steer': tf.VarLenFeature(dtype=tf.float32) }) features = tf.parse_single_example(example_serialized, feature_map) # if the data is downsampled by a temporal factor, the starting point should be random, such that we could use # all the data if FLAGS.non_random_temporal_downsample: tstart = 0 else: tstart = tf.random_uniform([], minval=0, maxval=FLAGS.temporal_downsample_factor, dtype=tf.int32) len_downsampled = FLAGS.FRAMES_IN_SEG // FLAGS.temporal_downsample_factor if FLAGS.only_seg == 1: seg = features['image/segmentation'].values[:] seg.set_shape([len_downsampled]) ctx = features['image/context'].values[:] ctx.set_shape([len_downsampled]) name = features['image/class/video_name'] encoded = features['image/encoded'].values[:FLAGS.FRAMES_IN_SEG] encoded_sub = encoded[tstart::FLAGS.temporal_downsample_factor] encoded_sub.set_shape([len_downsampled]) if FLAGS.no_image_input: # no image input is used, but the previous steps is done because # we assume we have an list of empty image inputs decoded = tf.zeros([ len_downsampled, FLAGS.IM_HEIGHT / FLAGS.decode_downsample_factor, FLAGS.IM_WIDTH / FLAGS.decode_downsample_factor, 3 ], tf.uint8) else: decoded = self.decode_jpeg(encoded_sub) if FLAGS.only_seg == 1: seg_decoded = self.decode_png(seg) ctx_decoded = tf.py_func(self.read_array, [ctx], [tf.float32])[0] ctx_decoded.set_shape( [len_downsampled, ctx_channel, ctx_height, ctx_width]) decoded_raw = decoded if FLAGS.resize_images != "": # should have format: new_height, new_width sp_size = FLAGS.resize_images.split(",") assert (len(sp_size) == 2) new_size = (int(sp_size[0]), int(sp_size[1])) decoded = tf.image.resize_bilinear(decoded, new_size) #decoded = tf.image.resize_nearest_neighbor(decoded, new_size) decoded = tf.cast(decoded, tf.uint8) if FLAGS.crop_car_hood > 0: decoded = decoded[:, :-FLAGS.crop_car_hood, :, :] speed = features['image/speeds'].values speed = tf.reshape(speed, [-1, 2]) speed = speed[:FLAGS.FRAMES_IN_SEG, :] speed = speed[tstart::FLAGS.temporal_downsample_factor, :] speed.set_shape([len_downsampled, 2]) # from speed to stop labels stop_label = tf.py_func( self.speed_to_future_has_stop, [speed, FLAGS.stop_future_frames, FLAGS.speed_limit_as_stop], [tf.int32])[0] #TODO(lowres: length of smoothed time) stop_label.set_shape([len_downsampled]) # Note that the turning heuristic is tuned for 3Hz video and urban area # Note also that stop_future_frames is reused for the turn turn = tf.py_func( self.turn_future_smooth, [speed, FLAGS.stop_future_frames, FLAGS.speed_limit_as_stop], [tf.float32])[0] #TODO(lowres) turn.set_shape([len_downsampled, self.naction]) if FLAGS.use_speed_yaw: yaw = features['sensor/yaw_imu'].values spd = features['sensor/speed_steer'].values ys = tf.pack([yaw, spd], axis=1, name="stack_yaw_speed") # Now the shape is N*2 ys = ys[ tstart:FLAGS.FRAMES_IN_SEG:FLAGS.temporal_downsample_factor, :] ys.set_shape([len_downsampled, 2]) if not FLAGS.use_nan_padding: # compute locs from ys ys = tf.pad(ys, [[0, FLAGS.stop_future_frames], [0, 0]], mode="SYMMETRIC", name="pad_afterwards") else: # invalidate the last two entries by setting it to NaN nan_const = tf.constant(float('NaN'), dtype=tf.float32, shape=(FLAGS.stop_future_frames, 2), name="NaN_constant") ys = tf.concat(0, [ys, nan_const], name="nan_pad_afterwards") ys = ys[FLAGS.stop_future_frames:, :] ys.set_shape([len_downsampled, 2]) locs = ys print("data loader is using raw yaw and speed") else: # get the relative future location # Note that we again abuse the notation a little bit, reusing stop_future_frames # TODO: normalize the course and speed by time locs = tf.py_func(self.relative_future_course_speed, [ speed, FLAGS.stop_future_frames, FLAGS.frame_rate / FLAGS.temporal_downsample_factor ], [tf.float32])[0] locs.set_shape([len_downsampled, 2]) # batching one 10 second segments into several smaller segments batching_inputs = [decoded, speed, stop_label, turn, locs] if FLAGS.only_seg == 1: batching_inputs += [seg_decoded, ctx_decoded] decoded_raw_loc = 7 else: decoded_raw_loc = 5 batching_inputs += [decoded_raw] batched = [self.batching(x, len_downsampled) for x in batching_inputs] name = tf.tile(name, [batched[0].get_shape()[0].value]) ins = batched[0:2] + [name] outs = batched[2:5] if FLAGS.city_data: # city batch means how many batch does each video sequence forms FLAGS.city_batch = len_downsampled // FLAGS.n_sub_frame # here we want to read in the cityscape data and downsample in the loop city_im_queue, city_seg_queue = self.queue_cityscape( FLAGS.city_image_list, FLAGS.city_label_list) global city_pointer city_pointer = 0 read_n = city_frames * FLAGS.city_batch city_im, city_seg = tf.py_func( self.read_cityscape, [city_im_queue, city_seg_queue, read_n], [tf.float32, tf.int32]) city_im = tf.reshape(city_im, [ FLAGS.city_batch, city_frames, FLAGS.IM_HEIGHT, FLAGS.IM_WIDTH, city_im_channel ]) city_seg = tf.reshape(city_seg, [ FLAGS.city_batch, city_frames, FLAGS.IM_HEIGHT, FLAGS.IM_WIDTH, city_seg_channel ]) if FLAGS.resize_images != "": # should have format: new_height, new_width sp_size = FLAGS.resize_images.split(",") assert (len(sp_size) == 2) new_size = (int(sp_size[0]), int(sp_size[1])) city_im = tf.reshape(city_im, [ FLAGS.city_batch * city_frames, FLAGS.IM_HEIGHT, FLAGS.IM_WIDTH, city_im_channel ]) city_seg = tf.reshape(city_seg, [ FLAGS.city_batch * city_frames, FLAGS.IM_HEIGHT, FLAGS.IM_WIDTH, city_seg_channel ]) city_im = tf.image.resize_bilinear(city_im, new_size) city_seg = tf.image.resize_nearest_neighbor(city_seg, new_size) city_im = tf.reshape(city_im, [ FLAGS.city_batch, city_frames, new_size[0], new_size[1], city_im_channel ]) city_seg = tf.reshape(city_seg, [ FLAGS.city_batch, city_frames, new_size[0], new_size[1], city_seg_channel ]) ins += [city_im] outs += [city_seg] if FLAGS.only_seg == 1: ins = ins + batched[5:7] outs = outs # adding the raw images ins += batched[decoded_raw_loc:(decoded_raw_loc + 1)] if FLAGS.action_mapping_loss: assert not FLAGS.city_data assert not FLAGS.only_seg outs += [ins[-2]] # dropout non-stop videos if FLAGS.balance_drop_prob > 0: retained = tf.py_func(self.no_stop_dropout_valid, [outs[0], FLAGS.balance_drop_prob], [tf.bool])[0] retained.set_shape([outs[0].get_shape()[0].value]) select = lambda tensors, valid: [ util.bool_select(x, valid) for x in tensors ] ins = select(ins, retained) outs = select(outs, retained) return ins, outs
def get_split(split_name, dataset_dir, file_pattern, reader, split_to_sizes, items_to_descriptions, num_classes): """Gets a dataset tuple with instructions for reading Pascal VOC dataset. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in split_to_sizes: raise ValueError('split name %s was not recognized.' % split_name) file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader # Features in Pascal VOC TFRecords. keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/channels': tf.FixedLenFeature([1], tf.int64), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=split_to_sizes[split_name], items_to_descriptions=items_to_descriptions, num_classes=num_classes, labels_to_names=labels_to_names)
def verify_bboxes(tfrecords): filename_queue = tf.train.string_input_producer( tfrecords, num_epochs=1 ) # Construct a Reader to read examples from the .tfrecords file reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'image/id' : tf.FixedLenFeature([], tf.string), 'image/height' : tf.FixedLenFeature([], tf.int64), 'image/width' : tf.FixedLenFeature([], tf.int64), 'image/object/bbox/xmin' : tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin' : tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax' : tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax' : tf.VarLenFeature(dtype=tf.float32), 'image/object/count' : tf.FixedLenFeature([], tf.int64) } ) image_height = tf.cast(features['image/height'], tf.float32) image_width = tf.cast(features['image/width'], tf.float32) image_id = features['image/id'] xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) num_bboxes = tf.cast(features['image/object/count'], tf.int32) bboxes = tf.concat(axis=0, values=[xmin, ymin, xmax, ymax]) bboxes = tf.transpose(bboxes, [1, 0]) fetches = [image_id, image_height, image_width, bboxes, num_bboxes] image_count = 0 bbox_widths = [] bbox_heights = [] images_with_small_bboxes = set() images_with_reversed_coords = set() images_with_bbox_count_mismatch = set() coord = tf.train.Coordinator() with tf.Session() as sess: tf.global_variables_initializer().run() tf.local_variables_initializer().run() tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): outputs = sess.run(fetches) img_id = outputs[0] img_h = outputs[1] img_w = outputs[2] img_bboxes = outputs[3] img_num_bboxes = outputs[4] if img_bboxes.shape[0] != img_num_bboxes: images_with_bbox_count_mismatch.add(img_id) for img_bbox in img_bboxes: x1, y1, x2, y2 = img_bbox # Reversed coordinates? if x1 > x2: images_with_reversed_coords.add(img_id) t = x1 x1 = x2 x2 = t if y1 > y2: images_with_reversed_coords.add(img_id) t = y1 y1 = y2 y2 = t w = (x2 - x1) * img_w h = (y2 - y1) * img_h # Too small of an area? if w * h < 10: images_with_small_bboxes.add(img_id) bbox_widths.append(w) bbox_heights.append(h) image_count += 1 except tf.errors.OutOfRangeError as e: pass # Basic info print("Found %d images" % (image_count,)) print() print("Found %d images with small bboxes" % (len(images_with_small_bboxes),)) #print("Images with areas < 10:") #for img_id in images_with_small_bboxes: # print(img_id) print() print("Found %d images with reversed coordinates" % (len(images_with_reversed_coords),)) #print("Images with reversed coordinates:") #for img_id in images_with_reversed_coords: # print(img_id) print() print("Found %d images with bbox count mismatches" % (len(images_with_bbox_count_mismatch),)) #for img_id in images_with_bbox_count_mismatch: # print(img_id) print() bbox_widths = np.round(np.array(bbox_widths)).astype(int) bbox_heights = np.round(np.array(bbox_heights)).astype(int) print("Mean width: %0.4f" % (np.mean(bbox_widths),)) print("Median width: %d" % (np.median(bbox_widths),)) print("Max width: %d" % (np.max(bbox_widths),)) print("Min width: %d" % (np.min(bbox_widths),)) print() print("Mean height: %0.4f" % (np.mean(bbox_heights),)) print("Median height: %d" % (np.median(bbox_heights),)) print("Max height: %d" % (np.max(bbox_heights),)) print("Min height: %d" % (np.min(bbox_heights),))
def parser(record): # preprocess "inp_perm" and "tgt_perm" def _process_perm_feature(example, prefix): for b in range(len(bin_sizes)): cnt = example.pop("{}_cnt_{}".format(prefix, b))[0] tup = example.pop("{}_tup_{}".format(prefix, b)) tup = tf.reshape(tf.sparse_tensor_to_dense(tup), shape=[cnt, 2]) # tf.float32 perm = tf.sparse_to_dense( sparse_indices=tup, output_shape=[tgt_len, bin_sizes[b]], sparse_values=1.0, default_value=0.0) example["{}_perm_{}".format(prefix, b)] = perm # whether allow the last batch with a potentially shorter length if use_tpu: record_spec = { "inputs": tf.FixedLenFeature([tgt_len], tf.int64), "labels": tf.FixedLenFeature([tgt_len], tf.int64), } else: record_spec = { "inputs": tf.VarLenFeature(tf.int64), "labels": tf.VarLenFeature(tf.int64), } # permutation related features if bin_sizes and use_tpu: # tf.float32 record_spec["inp_mask"] = tf.FixedLenFeature([tgt_len], tf.float32) record_spec["tgt_mask"] = tf.FixedLenFeature([tgt_len], tf.float32) record_spec["head_labels"] = tf.FixedLenFeature([tgt_len], tf.int64) for b in range(len(bin_sizes)): record_spec["inp_cnt_{}".format(b)] = tf.FixedLenFeature( [1], tf.int64) record_spec["inp_tup_{}".format(b)] = tf.VarLenFeature( tf.int64) record_spec["tgt_cnt_{}".format(b)] = tf.FixedLenFeature( [1], tf.int64) record_spec["tgt_tup_{}".format(b)] = tf.VarLenFeature( tf.int64) # retrieve serialized example example = tf.parse_single_example(serialized=record, features=record_spec) # transform permutation tuples to permutation matrices if bin_sizes and use_tpu: _process_perm_feature(example, "inp") _process_perm_feature(example, "tgt") # cast int64 into int32 # cast sparse to dense for key in list(example.keys()): val = example[key] if tf.keras.backend.is_sparse(val): val = tf.sparse.to_dense(val) if val.dtype == tf.int64: val = tf.to_int32(val) example[key] = val if use_tpu: return example else: return example["inputs"], example["labels"]
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): """Parse single serialized SequenceExample from the TFRecords.""" # Read/parse frame/segment-level labels. context_features = { "id": tf.FixedLenFeature([], tf.string), } if self.segment_labels: context_features.update({ # There is no need to read end-time given we always assume the segment # has the same size. "segment_labels": tf.VarLenFeature(tf.int64), "segment_start_times": tf.VarLenFeature(tf.int64), "segment_scores": tf.VarLenFeature(tf.float32) }) else: context_features.update({"labels": tf.VarLenFeature(tf.int64)}) sequence_features = { feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names } contexts, features = tf.parse_single_sequence_example( serialized_example, context_features=context_features, sequence_features=sequence_features) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), ( "length of feature_names (={}) != length of feature_sizes (={})". format(len(self.feature_names), len(self.feature_sizes))) num_frames = -1 # the number of frames in the video feature_matrices = [None ] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # Partition frame-level feature matrix to segment-level feature matrix. if self.segment_labels: start_times = contexts["segment_start_times"].values # Here we assume all the segments that started at the same start time has # the same segment_size. uniq_start_times, seg_idxs = tf.unique(start_times, out_idx=tf.dtypes.int64) # TODO(zhengxu): Ensure the segment_sizes are all same. segment_size = self.segment_size # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3. range_mtx = tf.expand_dims( uniq_start_times, axis=-1) + tf.expand_dims( tf.range(0, segment_size, dtype=tf.int64), axis=0) # Shape: [num_segment, segment_size, feature_dim]. batch_video_matrix = tf.gather_nd( video_matrix, tf.expand_dims(range_mtx, axis=-1)) num_segment = tf.shape(batch_video_matrix)[0] batch_video_ids = tf.reshape( tf.tile([contexts["id"]], [num_segment]), (num_segment, )) batch_frames = tf.reshape(tf.tile([segment_size], [num_segment]), (num_segment, )) # For segment labels, all labels are not exhausively rated. So we only # evaluate the rated labels. # Label indices for each segment, shape: [num_segment, 2]. label_indices = tf.stack( [seg_idxs, contexts["segment_labels"].values], axis=-1) label_values = contexts["segment_scores"].values sparse_labels = tf.sparse.SparseTensor( label_indices, label_values, (num_segment, self.num_classes)) batch_labels = tf.sparse.to_dense(sparse_labels, validate_indices=False) sparse_label_weights = tf.sparse.SparseTensor( label_indices, tf.ones_like(label_values, dtype=tf.float32), (num_segment, self.num_classes)) batch_label_weights = tf.sparse.to_dense(sparse_label_weights, validate_indices=False) else: # Process video-level labels. label_indices = contexts["labels"].values sparse_labels = tf.sparse.SparseTensor( tf.expand_dims(label_indices, axis=-1), tf.ones_like(contexts["labels"].values, dtype=tf.bool), (self.num_classes, )) labels = tf.sparse.to_dense(sparse_labels, default_value=False, validate_indices=False) # convert to batch format. batch_video_ids = tf.expand_dims(contexts["id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) batch_label_weights = None output_dict = { "video_ids": batch_video_ids, "video_matrix": batch_video_matrix, "labels": batch_labels, "num_frames": batch_frames, } if batch_label_weights is not None: output_dict["label_weights"] = batch_label_weights return output_dict
def get_split(split_name, dataset_dir=None, config=None): """Returns a dataset tuple for FSNS dataset. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources, by default it uses a predefined CNS path (see DEFAULT_DATASET_DIR). config: A dictionary with dataset configuration. If None - will use the DEFAULT_CONFIG. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if not dataset_dir: dataset_dir = DEFAULT_DATASET_DIR if not config: config = DEFAULT_CONFIG if split_name not in config['splits']: raise ValueError('split name %s was not recognized.' % split_name) logging.info('Using %s dataset split_name=%s dataset_dir=%s', config['name'], split_name, dataset_dir) # Ignores the 'image/height' feature. zero = tf.zeros([1], dtype=tf.int64) keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 'image/width': tf.FixedLenFeature([1], tf.int64, default_value=zero), 'image/orig_width': tf.FixedLenFeature([1], tf.int64, default_value=zero), 'image/class': tf.FixedLenFeature([config['max_sequence_length']], tf.int64), 'image/unpadded_class': tf.VarLenFeature(tf.int64), 'image/text': tf.FixedLenFeature([1], tf.string, default_value=''), } items_to_handlers = { 'image': slim.tfexample_decoder.Image(shape=config['image_shape'], image_key='image/encoded', format_key='image/format'), 'label': slim.tfexample_decoder.Tensor(tensor_key='image/class'), 'text': slim.tfexample_decoder.Tensor(tensor_key='image/text'), 'num_of_views': _NumOfViewsHandler(width_key='image/width', original_width_key='image/orig_width', num_of_views=config['num_of_views']) } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) charset_file = os.path.join(dataset_dir, config['charset_filename']) charset = read_charset(charset_file) file_pattern = os.path.join(dataset_dir, config['splits'][split_name]['pattern']) return slim.dataset.Dataset( data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=config['splits'][split_name]['size'], items_to_descriptions=config['items_to_descriptions'], # additional parameters for convenience. charset=charset, num_char_classes=len(charset), num_of_views=config['num_of_views'], max_sequence_length=config['max_sequence_length'], null_code=config['null_code'])
def _parse_function(example): # parsing context_feature_info = { 'cameras': tf.VarLenFeature(dtype=tf.string), 'gazemaps': tf.VarLenFeature(dtype=tf.string), 'video_id': tf.FixedLenFeature(shape=[], dtype=tf.int64) } sequence_feature_info = { 'feature_maps': tf.FixedLenSequenceFeature(shape=[], dtype=tf.string), 'gaze_ps': tf.FixedLenSequenceFeature(shape=[], dtype=tf.string), 'predicted_time_points': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64) } context_features, sequence_features = tf.parse_single_sequence_example( example, context_features=context_feature_info, sequence_features=sequence_feature_info) cameras = tf.sparse_tensor_to_dense(context_features["cameras"], default_value='') gazemaps = tf.sparse_tensor_to_dense(context_features["gazemaps"], default_value='') video_id = context_features['video_id'] feature_maps = tf.reshape( tf.decode_raw(sequence_features["feature_maps"], tf.float32), [ -1, ] + args.feature_map_size + [args.feature_map_channels]) predicted_time_points = sequence_features["predicted_time_points"] if include_labels: labels = tf.reshape( tf.decode_raw(sequence_features["gaze_ps"], tf.float32), [-1, args.gazemap_size[0] * args.gazemap_size[1]]) if n_steps is not None: #select a subsequence length = tf.shape(cameras)[0] offset = tf.random_uniform(shape=[], minval=0, maxval=tf.maximum( length - n_steps + 1, 1), dtype=tf.int32) end = tf.minimum(offset + n_steps, length) cameras = cameras[offset:end] feature_maps = feature_maps[offset:end] gazemaps = gazemaps[offset:end] predicted_time_points = predicted_time_points[offset:end] if include_labels: labels = labels[offset:end] # decode jpg's cameras = tf.map_fn(tf.image.decode_jpeg, cameras, dtype=tf.uint8, back_prop=False) gazemaps = tf.map_fn(tf.image.decode_jpeg, gazemaps, dtype=tf.uint8, back_prop=False) # return features and labels features = {} features['cameras'] = cameras features['feature_maps'] = feature_maps features['gazemaps'] = gazemaps features['video_id'] = video_id features['predicted_time_points'] = predicted_time_points if include_labels: return features, labels else: return features
def __init__(self, dtype='uint8'): """Constructor sets keys_to_features and items_to_handlers. Args: image_shape: image shape for raw data format. """ if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'uint16': self._dtype = tf.uint16 else: self._dtype = tf.uint8 self.keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''), 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), 'image/sensor': tf.FixedLenFeature((), tf.string, default_value=''), 'image/height': tf.FixedLenFeature((), tf.int64, 1), 'image/width': tf.FixedLenFeature((), tf.int64, 1), 'image/gsd': tf.FixedLenFeature((), tf.float32, 1), # Object boxes. 'image/object/bbox/xmin': tf.VarLenFeature(tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(tf.float32), # Object rotated boxes. 'image/object/rbbox/cy': tf.VarLenFeature(tf.float32), 'image/object/rbbox/cx': tf.VarLenFeature(tf.float32), 'image/object/rbbox/h': tf.VarLenFeature(tf.float32), 'image/object/rbbox/w': tf.VarLenFeature(tf.float32), 'image/object/rbbox/ang': tf.VarLenFeature(tf.float32), # Object classes. 'image/object/class/label': tf.VarLenFeature(tf.int64), 'image/object/area': tf.VarLenFeature(tf.float32), 'image/object/is_crowd': tf.VarLenFeature(tf.int64), 'image/object/difficult': tf.VarLenFeature(tf.int64), # Instance masks and classes. 'image/segmentation/object': tf.VarLenFeature(tf.int64), 'image/segmentation/object/class': tf.VarLenFeature(tf.int64) } self.items_to_handlers = { fields.InputDataFields.image: Image(dtype=self._dtype), fields.InputDataFields.source_id: ( slim_example_decoder.Tensor('image/source_id')), fields.InputDataFields.sensor: ( slim_example_decoder.Tensor('image/sensor')), fields.InputDataFields.key: ( slim_example_decoder.Tensor('image/key/sha256')), fields.InputDataFields.filename: ( slim_example_decoder.Tensor('image/filename')), fields.InputDataFields.gsd: ( slim_example_decoder.Tensor('image/gsd')), # Object boxes. fields.InputDataFields.groundtruth_boxes: ( slim_example_decoder.BoundingBox( ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')), # Object rotated boxes. fields.InputDataFields.groundtruth_rboxes: ( RotatedBoundingBox( ['cy', 'cx', 'h', 'w', 'ang'], 'image/object/rbbox/')), # Object classes. fields.InputDataFields.groundtruth_classes: ( slim_example_decoder.Tensor('image/object/class/label')), fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor( 'image/object/area'), fields.InputDataFields.groundtruth_is_crowd: ( slim_example_decoder.Tensor('image/object/is_crowd')), fields.InputDataFields.groundtruth_difficult: ( slim_example_decoder.Tensor('image/object/difficult')), # Instance masks and classes. fields.InputDataFields.groundtruth_instance_masks: ( slim_example_decoder.ItemHandlerCallback( ['image/segmentation/object', 'image/height', 'image/width'], self._reshape_instance_masks)), fields.InputDataFields.groundtruth_instance_classes: ( slim_example_decoder.Tensor('image/segmentation/object/class')), }
def distorted_inputs(data_dir, batch_size): file_dir = os.path.join(data_dir, 'train.tfrecord') num_examples_per_epoch = NUM_EXAMPLES_FOR_TRAIN if not tf.gfile.Exists(file_dir): raise ValueError('Failed to find file%s' % file_dir) with tf.name_scope('input'): file_queue = tf.train.string_input_producer([file_dir], num_epochs=50) reader = tf.TFRecordReader() _, serialized = reader.read(file_queue) feature = tf.parse_single_example(serialized=serialized, features={ 'image/shape': tf.FixedLenFeature([3], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'object/name': tf.VarLenFeature(tf.string), 'object/truncated': tf.VarLenFeature(tf.int64), 'object/difficult': tf.VarLenFeature(tf.int64), 'object/xmin': tf.VarLenFeature(tf.float32), 'object/ymin': tf.VarLenFeature(tf.float32), 'object/xmax': tf.VarLenFeature(tf.float32), 'object/ymax': tf.VarLenFeature(tf.float32) }) shape, label = feature['image/shape'], feature['label'] image_raw = feature['image_raw'] # process lael # change 42 classes to 2 classes #label = 0 if label == [33] else 1 label = tf.cast(label, tf.int32) # process image image = tf.image.decode_jpeg(image_raw) float_image = tf.cast(image, tf.float32) float_image = tf.reshape(float_image, [224, 224, 3]) # data argumentation # Randomly crop a [height, width] section of the image. #distorted_image = tf.random_crop(image, shape) resized_image = tf.image.resize_images(float_image, (224, 224)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(resized_image) # Because these operations are not commutative, consider randomizing # the order their operation. # NOTE: since per_image_standardization zeros the mean and makes # the stddev unit, this likely has no effect see tensorflow#1458. distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) standard_image = tf.image.per_image_standardization(distorted_image) #standard_image.setshape(shape) #label.setshape([1]) min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(num_examples_per_epoch * min_fraction_of_examples_in_queue) return _generate_image_and_label_batch(standard_image, label, min_queue_examples, batch_size, shuffle=True)
def inputs(is_train, data_dir, batch_size): if is_train: file_dir = os.path.join(data_dir, 'train.tfrecord') num_examples_per_epoch = NUM_EXAMPLES_FOR_TRAIN else: file_dir = os.path.join(data_dir, 'test.tfrecord') num_examples_per_epoch = NUM_EXAMPLES_FOR_EVAL if not tf.gfile.Exists(file_dir): raise ValueError('Failed to find file%s' % file_dir) with tf.name_scope('input'): file_queue = tf.train.string_input_producer([file_dir], num_epochs=50) reader = tf.TFRecordReader() _, serialized = reader.read(file_queue) feature = tf.parse_single_example(serialized=serialized, features={ 'image/shape': tf.FixedLenFeature([3], tf.int64), 'label': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'object/name': tf.VarLenFeature(tf.string), 'object/truncated': tf.VarLenFeature(tf.int64), 'object/difficult': tf.VarLenFeature(tf.int64), 'object/xmin': tf.VarLenFeature(tf.float32), 'object/ymin': tf.VarLenFeature(tf.float32), 'object/xmax': tf.VarLenFeature(tf.float32), 'object/ymax': tf.VarLenFeature(tf.float32) }) shape, label = feature['image/shape'], feature['label'] image_raw = feature['image_raw'] # process lael #change 42 classes to 2 classes #label = 0 if label==33 else 1 label = tf.cast(label, tf.int32) # process image image = tf.image.decode_jpeg(image_raw) float_image = tf.cast(image, tf.float32) float_image = tf.reshape(float_image, [1920, 2560, 3]) #reshape_image = tf.reshape(image,[1920,2560,3]) #tf.decode_raw()解码二进制数据返回的是dim=1的list,需要用tf.reshpe()变成dim=3的图像形式 #tf.image.decode_jpeg()解码二进制返回的直接就是jpeg格式编码的图像 resized_image = tf.image.resize_images(float_image, [224, 224]) standard_image = tf.image.per_image_standardization(resized_image) standard_image = tf.reshape(standard_image, [224, 224, 3]) #standard_image.setshape(shape) #label.setshape([1]) min_fraction_of_examples_in_queue = 0.2 min_queue_examples = int(num_examples_per_epoch * min_fraction_of_examples_in_queue) return _generate_image_and_label_batch(standard_image, label, min_queue_examples, batch_size, shuffle=True)
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading ImageNet. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ assert FLAGS.num_classes == 0 or FLAGS.num_classes == DEFAULT_NUM_CLASSES num_classes = FLAGS.num_classes or DEFAULT_NUM_CLASSES _SPLITS_TO_SIZES = { 'train': FLAGS.train_size or _DEFAULT_TRAIN_SIZE, 'validation': FLAGS.validation_size or _DEFAULT_VALIDATION_SIZE, } if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.VarLenFeature(dtype=tf.int64), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''), } output_name = 'target' if FLAGS.dataset_use_target else 'source' items_to_handlers = { output_name: slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'conditional_labels': dataset_utils.OneHotLabelTensor( 'image/class/text', tags_id_lookup_file=FLAGS.tags_id_lookup_file, num_classes=num_classes, tags_key_column_index=FLAGS.tags_key_column_index, tags_value_column_index=FLAGS.tags_value_column_index), 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), 'filename': slim.tfexample_decoder.Tensor('image/filename'), } items_used = [output_name, 'conditional_labels', 'filename', 'label_text'] items_need_preprocessing = [ output_name, 'conditional_labels', ] decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) return slim.dataset.Dataset( data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, items_used=items_used, items_need_preprocessing=items_need_preprocessing, num_classes=num_classes, has_source=True)
def get_split(split_name, dataset_dir, file_pattern, num_samples, reader=None): dataset_dir = util.io.get_absolute_path(dataset_dir) if util.str.contains(file_pattern, '%'): file_pattern = util.io.join_path(dataset_dir, file_pattern % split_name) else: file_pattern = util.io.join_path(dataset_dir, file_pattern) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'filename': slim.tfexample_decoder.Tensor('image/filename'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/oriented_bbox/x1': slim.tfexample_decoder.Tensor('image/object/bbox/x1'), 'object/oriented_bbox/x2': slim.tfexample_decoder.Tensor('image/object/bbox/x2'), 'object/oriented_bbox/x3': slim.tfexample_decoder.Tensor('image/object/bbox/x3'), 'object/oriented_bbox/x4': slim.tfexample_decoder.Tensor('image/object/bbox/x4'), 'object/oriented_bbox/y1': slim.tfexample_decoder.Tensor('image/object/bbox/y1'), 'object/oriented_bbox/y2': slim.tfexample_decoder.Tensor('image/object/bbox/y2'), 'object/oriented_bbox/y3': slim.tfexample_decoder.Tensor('image/object/bbox/y3'), 'object/oriented_bbox/y4': slim.tfexample_decoder.Tensor('image/object/bbox/y4'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label') } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = {0: 'background', 1: 'text'} items_to_descriptions = { 'image': 'A color image of varying height and width.', 'shape': 'Shape of the image', 'object/bbox': 'A list of bounding boxes, one per each object.', 'object/label': 'A list of labels, one per each object.', } return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=items_to_descriptions, num_classes=2, labels_to_names=labels_to_names)
from tensorflow_transform.tf_metadata import dataset_schema as sch test_feature_spec = { # FixedLenFeatures 'fixed_categorical_int_with_range': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'fixed_int': tf.FixedLenFeature(shape=[5], dtype=tf.int64), 'fixed_float': tf.FixedLenFeature(shape=[5], dtype=tf.float32), 'fixed_string': tf.FixedLenFeature(shape=[5], dtype=tf.string), # VarLenFeatures 'var_int': tf.VarLenFeature(dtype=tf.int64), 'var_float': tf.VarLenFeature(dtype=tf.float32), 'var_string': tf.VarLenFeature(dtype=tf.string), } def get_test_schema(): return sch.from_feature_spec(test_feature_spec) def get_manually_created_schema(): """Provide a test schema built from scratch using the Schema classes.""" return sch.Schema({ # FixedLenFeatures
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading ImageNet. Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in _SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'label': slim.tfexample_decoder.Tensor('image/class/label'), 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) else: labels_to_names = create_readable_names_for_imagenet_labels() dataset_utils.write_label_file(labels_to_names, dataset_dir) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=_SPLITS_TO_SIZES[split_name], items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def get_next_batch(): """ 这块就是从tfrecord文件中读取已经保存的数据; """ # 获取指定目录下的所有tfrecord文件 # #加上r让字符串不转义 tfrecords = glob.glob(f'{hp.TRAIN_DATASET_PATH}/*.tfrecord') # print("line23: tfrecords = "+str(tfrecords)) """ tf.train.string_input_producer( string_tensor, num_epochs=None, # NUM_EPOCHS = 150;从string_tensor中产生 num_epochs 次字符串;如果未指定,则可以无限次循环遍历字符串 shuffle=True, # shuffle:布尔值。如果为true,则在每个epoch内随机打乱顺序 seed=None, capacity=32, shared_name=None, name=None, cancel_op=None ) 输出字符串到一个输入管道队列 :从TFRecords文件中读取数据, 首先需要用tf.train.string_input_producer()生成一个解析队列。 之后调用 tf.TFRecordReader 的 tf.parse_single_example 解析器 https://blog.csdn.net/tefuirnever/article/details/90271862 """ # 输出字符串到一个输入管道队列 filename_queue = tf.train.string_input_producer(tfrecords, shuffle=True, num_epochs=hp.NUM_EPOCHS) reader = tf.TFRecordReader() _, serialized_example = reader.read( filename_queue) # 解析器首先读取解析队列,返回serialized_example对象 # 之后调用tf.parse_single_example操作将 Example 协议缓冲区(protocol buffer)解析为张量。 features = tf.parse_single_example(serialized_example, features={ 'ori_spkid': tf.FixedLenFeature(shape=(1, ), dtype=tf.int64), 'ori_mel': tf.VarLenFeature(dtype=tf.float32), 'ori_mel_shape': tf.FixedLenFeature(shape=(2, ), dtype=tf.int64), 'aim_spkid': tf.FixedLenFeature(shape=(1, ), dtype=tf.int64), 'aim_mel': tf.VarLenFeature(dtype=tf.float32), 'aim_mel_shape': tf.FixedLenFeature(shape=(2, ), dtype=tf.int64), }) # tf.sparse_tensor_to_dense 将 SparseTensor 转换为稠密张量.(即理解为,稀疏矩阵,填充上默认值) features['ori_mel'] = tf.sparse_tensor_to_dense(features['ori_mel']) features['aim_mel'] = tf.sparse_tensor_to_dense(features['aim_mel']) ori_spk = features['ori_spkid'] ori_mel = tf.reshape(features['ori_mel'], features['ori_mel_shape']) aim_spk = features['aim_spkid'] aim_mel = tf.reshape(features['aim_mel'], features['aim_mel_shape']) # self.CODED_DIM = 60 # 压缩成60维 ori_mel = tf.reshape(ori_mel, [-1, hp.CODED_DIM]) aim_mel = tf.reshape(aim_mel, [-1, hp.CODED_DIM]) # 80 维度 mel ori_spk_batch, ori_mel_batch, aim_spk_batch, aim_mel_batch = tf.train.batch( [ori_spk, ori_mel, aim_spk, aim_mel], batch_size=hp.BATCH_SIZE, capacity=100, num_threads=10, dynamic_pad=True, allow_smaller_final_batch=False) """ 是说在这里,get_next_batch()函数,返回之前,就可以做 pad 操作吗? """ # tf.shape(ori_mel_batch)[1] max_frame = tf.maximum( tf.shape(ori_mel_batch)[1], tf.shape(aim_mel_batch)[1]) # 最大帧值 gap_frame = max_frame - tf.minimum( tf.shape(ori_mel_batch)[1], tf.shape(aim_mel_batch)[1]) # 帧值 之差 # print(tf.math.subtract(max_frame, tf.shape(aim_mel_batch)[1])) padded = tf.zeros([ tf.shape(aim_mel_batch)[0], tf.subtract(max_frame, tf.shape(aim_mel_batch)[1]), tf.shape(aim_mel_batch)[2] ], dtype=tf.float32) # a = padded aim_mel_batch = tf.concat((aim_mel_batch, padded), axis=1) # concated_1 = aim_mel_batch padded = tf.zeros([ tf.shape(ori_mel_batch)[0], tf.subtract(max_frame, tf.shape(ori_mel_batch)[1]), tf.shape(ori_mel_batch)[2] ], dtype=tf.float32) # b = padded # padded = tf.zeros_like([1, tf.math.subtract(max_frame, tf.shape(ori_mel_batch)[1]), 1], dtype=tf.float32) ori_mel_batch = tf.concat((ori_mel_batch, padded), axis=1) # concated_2 = ori_mel_batch # padded = tf.zeros_like([0,差值,0]) # aim_mel_batch = tf.concat((aim_mel_batch, padded), axis=1) # aim_mel_batch = tf.pad(aim_mel_batch, [[0, 0], [0, tf.math.subtract(max_frame, tf.shape(aim_mel_batch)[1])], [0, 0]], "CONSTANT") # ori_mel_batch = tf.pad(ori_mel_batch, [[0, 0], [0, tf.math.subtract(max_frame, tf.shape(ori_mel_batch)[1])], [0, 0]], "CONSTANT") # return ori_spk_batch, ori_mel_batch, aim_spk_batch, aim_mel_batch, a, b,concated_1,concated_2,max_frame return ori_spk_batch, ori_mel_batch, aim_spk_batch, aim_mel_batch
def main(): # Change these for different models FEATURE_SIZE = 124 LABEL_SIZE = 2 TRAIN_TFRECORDS_FILE = "data/a8a_train.libsvm.tfrecords" VALIDATE_TFRECORDS_FILE = "data/a8a_test.libsvm.tfrecords" learning_rate = FLAGS.learning_rate epoch_number = FLAGS.epoch_number thread_number = FLAGS.thread_number batch_size = FLAGS.batch_size validate_batch_size = FLAGS.validate_batch_size min_after_dequeue = FLAGS.min_after_dequeue capacity = thread_number * batch_size + min_after_dequeue mode = FLAGS.mode checkpoint_dir = FLAGS.checkpoint_dir if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tensorboard_dir = FLAGS.tensorboard_dir if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) return serialized_example # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(TRAIN_TFRECORDS_FILE), num_epochs=epoch_number) serialized_example = read_and_decode(filename_queue) batch_serialized_example = tf.train.shuffle_batch( [serialized_example], batch_size=batch_size, num_threads=thread_number, capacity=capacity, min_after_dequeue=min_after_dequeue) features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature( [], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) batch_labels = features["label"] batch_ids = features["ids"] batch_values = features["values"] # Read TFRecords file for validation validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(VALIDATE_TFRECORDS_FILE), num_epochs=epoch_number) validate_serialized_example = read_and_decode(validate_filename_queue) validate_batch_serialized_example = tf.train.shuffle_batch( [validate_serialized_example], batch_size=validate_batch_size, num_threads=thread_number, capacity=capacity, min_after_dequeue=min_after_dequeue) validate_features = tf.parse_example( validate_batch_serialized_example, features={ "label": tf.FixedLenFeature( [], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) validate_batch_labels = validate_features["label"] validate_batch_ids = validate_features["ids"] validate_batch_values = validate_features["values"] # Define the model input_units = FEATURE_SIZE hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 output_units = LABEL_SIZE def full_connect(inputs, weights_shape, biases_shape, is_train=True): with tf.device('/cpu:0'): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def sparse_full_connect(sparse_ids, sparse_values, weights_shape, biases_shape, is_train=True): with tf.device('/cpu:0'): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) return tf.nn.embedding_lookup_sparse(weights, sparse_ids, sparse_values, combiner="sum") + biases def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): return tf.nn.relu(full_connect(inputs, weights_shape, biases_shape, is_train)) def dnn_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("layer1"): sparse_layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, hidden1_units], [hidden1_units], is_train) layer = tf.nn.relu(sparse_layer) with tf.variable_scope("layer2"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer3"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def lr_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("logistic_regression"): layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(sparse_ids, sparse_values, is_train=True): return lr_inference(sparse_ids, sparse_values, is_train) + dnn_inference( sparse_ids, sparse_values, is_train) def inference(sparse_ids, sparse_values, is_train=True): print("Use the model: {}".format(FLAGS.model)) if FLAGS.model == "lr": return lr_inference(sparse_ids, sparse_values, is_train) elif FLAGS.model == "dnn": return dnn_inference(sparse_ids, sparse_values, is_train) elif FLAGS.model == "wide_and_deep": return wide_and_deep_inference(sparse_ids, sparse_values, is_train) else: print("Unknown model, exit now") exit(1) logits = inference(batch_ids, batch_values, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, batch_labels) loss = tf.reduce_mean(cross_entropy, name='loss') print("Use the optimizer: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif FLAGS.optimizer == "momentum": # optimizer = tf.train.MomentumOptimizer(learning_rate) print("Not support optimizer: {} yet, exit now".format(FLAGS.optimizer)) exit(1) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(learning_rate) elif FLAGS.optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == "ftrl": optimizer = tf.train.FtrlOptimizer(learning_rate) elif FLAGS.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer)) exit(1) with tf.device('/cpu:0'): global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_ids, batch_values, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal( tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(1, [indices, sparse_labels]) outshape = tf.pack([derived_size, LABEL_SIZE]) new_train_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_train_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_ids, validate_batch_values, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal( tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(1, [indices, sparse_labels]) outshape = tf.pack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax, new_validate_batch_labels) # Define inference op sparse_index = tf.placeholder(tf.int64, [None, 2]) sparse_ids = tf.placeholder(tf.int64, [None]) sparse_values = tf.placeholder(tf.float32, [None]) sparse_shape = tf.placeholder(tf.int64, [2]) inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape) inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape) inference_logits = inference(inference_ids, inference_values, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) # Initialize saver and summary checkpoint_file = checkpoint_dir + "/checkpoint.ckpt" steps_to_validate = FLAGS.steps_to_validate tf.scalar_summary("loss", loss) tf.scalar_summary("train_accuracy", train_accuracy) tf.scalar_summary("train_auc", train_auc) tf.scalar_summary("validate_accuracy", validate_accuracy) tf.scalar_summary("validate_auc", validate_auc) saver = tf.train.Saver() keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) # Create session to run with tf.Session() as sess: summary_op = tf.merge_all_summaries() writer = tf.train.SummaryWriter(tensorboard_dir, sess.graph) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) if mode == "train": ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Continue training from the model {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # Get coordinator and run queues to read data coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run([train_op, loss, global_step]) if step % steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value, summary_value = sess.run( [train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op]) end_time = datetime.datetime.now() print( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format( end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value)) writer.add_summary(summary_value, step) saver.save(sess, checkpoint_file, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: print("Done training after reading all data") print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"keys": keys_placeholder, "indexs": sparse_index, "ids": sparse_ids, "values": sparse_values, "shape": sparse_shape}), 'outputs': exporter.generic_signature( {"keys": keys, "softmax": inference_softmax, "prediction": inference_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) finally: coord.request_stop() # Wait for threads to exit coord.join(threads) elif mode == "export": print("Start to export model directly") # Load the checkpoint files ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("No checkpoint found, exit now") exit(1) # Export the model files print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"keys": keys_placeholder, "indexs": sparse_index, "ids": sparse_ids, "values": sparse_values, "shape": sparse_shape}), 'outputs': exporter.generic_signature( {"keys": keys, "softmax": inference_softmax, "prediction": inference_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) elif mode == "inference": print("Start to run inference") start_time = datetime.datetime.now() inference_result_file_name = "./inference_result.txt" inference_test_file_name = "./data/a8a_test.libsvm" labels = [] feature_ids = [] feature_values = [] feature_index = [] ins_num = 0 for line in open(inference_test_file_name, "r"): tokens = line.split(" ") labels.append(int(tokens[0])) feature_num = 0 for feature in tokens[1:]: feature_id, feature_value = feature.split(":") feature_ids.append(int(feature_id)) feature_values.append(float(feature_value)) feature_index.append([ins_num, feature_num]) feature_num += 1 ins_num += 1 ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Use the model {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("No model found, exit now") exit(1) prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={sparse_index: feature_index, sparse_ids: feature_ids, sparse_values: feature_values, sparse_shape: [ins_num, FEATURE_SIZE]}) end_time = datetime.datetime.now() print("[{}] Inference result: {}".format(end_time - start_time, prediction)) # Compute accuracy label_number = len(labels) correct_label_number = 0 for i in range(label_number): if labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc expected_labels = np.array(labels) predict_labels = prediction_softmax[:, 0] fpr, tpr, thresholds = metrics.roc_curve(expected_labels, predict_labels, pos_label=0) auc = metrics.auc(fpr, tpr) print("For inference data, accuracy: {}, auc: {}".format(accuracy, auc)) # Save inference result into file np.savetxt(inference_result_file_name, prediction, delimiter=",") print("Save result to file: {}".format(inference_result_file_name))
def simple_fake_sequence_to_prediction(export_path, eval_export_path): """Trains and exports a fake_sequence_to_prediction model.""" input_feature_spec = { 'values_t1': tf.VarLenFeature(dtype=tf.float32), 'values_t2': tf.VarLenFeature(dtype=tf.float32), 'values_t3': tf.VarLenFeature(dtype=tf.float32) } label_feature_spec = dict(input_feature_spec) label_feature_spec['label'] = tf.FixedLenFeature([1], dtype=tf.float32) def _make_embedding_and_sparse_values(features): """Make "embedding" and "sparse_values" features.""" embedding_dim = 3 sparse_dims = 3 sparse_timesteps = 3 # Create a three-dimensional "embedding" based on the value of the feature # The embedding is simply [1, 1, 1] * feature_value # (or [0, 0, 0] if the feature is missing). batch_size = tf.cast(tf.shape(features['values_t1'])[0], dtype=tf.int64) ones = tf.ones(shape=[embedding_dim]) dense_t1 = tf.sparse_tensor_to_dense(features['values_t1']) dense_t2 = tf.sparse_tensor_to_dense(features['values_t2']) dense_t3 = tf.sparse_tensor_to_dense(features['values_t3']) embedding_t1 = ones * dense_t1 embedding_t2 = ones * dense_t2 embedding_t3 = ones * dense_t3 embeddings = tf.stack([embedding_t1, embedding_t2, embedding_t3], axis=1) features['embedding'] = embeddings del features['values_t1'] del features['values_t2'] del features['values_t3'] # Make the "sparse_values" feature. sparse_values = tf.squeeze( tf.concat( [ dense_t1, dense_t1**2, dense_t1**3, dense_t2, dense_t2**2, dense_t2**3, dense_t3, dense_t3**2, dense_t3**3 ], axis=0)) sparse_total_elems = batch_size * sparse_dims * sparse_timesteps seq = tf.range(0, sparse_total_elems, dtype=tf.int64) batch_num = seq % batch_size timestep = tf.div(seq, batch_size * sparse_dims) offset = tf.div(seq, batch_size) % sparse_dims sparse_indices = tf.stack([batch_num, timestep, offset], axis=1) features['sparse_values'] = tf.SparseTensor( indices=sparse_indices, values=sparse_values, dense_shape=[batch_size, sparse_timesteps, sparse_dims]) def model_fn(features, labels, mode, params): """Model function for custom estimator.""" del params dense_values = tf.sparse_tensor_to_dense( features['sparse_values'], validate_indices=False) a = tf.Variable(1.0, dtype=tf.float32, name='a') b = tf.Variable(2.0, dtype=tf.float32, name='b') c = tf.Variable(3.0, dtype=tf.float32, name='c') d = tf.Variable(4.0, dtype=tf.float32, name='d') e = tf.Variable(5.0, dtype=tf.float32, name='e') f = tf.Variable(6.0, dtype=tf.float32, name='f') predictions = ( a * tf.reduce_sum(features['embedding'][:, 0, :], axis=1) + b * tf.reduce_sum(features['embedding'][:, 1, :], axis=1) + c * tf.reduce_sum(features['embedding'][:, 2, :], axis=1) + d * tf.reduce_sum(dense_values[:, 0, :], axis=1) + e * tf.reduce_sum(dense_values[:, 1, :], axis=1) + f * tf.reduce_sum(dense_values[:, 2, :], axis=1)) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions={'score': predictions}, export_outputs={ 'score': tf.estimator.export.RegressionOutput(predictions) }) loss = tf.losses.mean_squared_error(labels, tf.expand_dims(predictions, axis=-1)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops={ 'mean_squared_error': tf.metrics.mean_squared_error( labels, tf.expand_dims(predictions, axis=-1)), 'mean_prediction': tf.metrics.mean(predictions), }, predictions=predictions) def train_input_fn(): """Train input function.""" def make_example_with_label(values_t1=None, values_t2=None, values_t3=None): """Make example with label.""" effective_t1 = 0.0 effective_t2 = 0.0 effective_t3 = 0.0 args = {} if values_t1 is not None: args['values_t1'] = float(values_t1) effective_t1 = values_t1 if values_t2 is not None: args['values_t2'] = float(values_t2) effective_t2 = values_t2 if values_t3 is not None: args['values_t3'] = float(values_t3) effective_t3 = values_t3 label = (3 * effective_t1 + 6 * effective_t2 + 9 * effective_t3 + 4 * (effective_t1 + effective_t1**2 + effective_t1**3) + 5 * (effective_t2 + effective_t2**2 + effective_t2**3) + 6 * (effective_t3 + effective_t3**2 + effective_t3**3)) args['label'] = float(label) return util.make_example(**args) examples = [ make_example_with_label(values_t1=1.0), make_example_with_label(values_t2=1.0), make_example_with_label(values_t3=1.0), make_example_with_label(values_t1=2.0, values_t2=3.0), make_example_with_label(values_t1=5.0, values_t3=7.0), make_example_with_label(values_t2=11.0, values_t3=13.0), make_example_with_label(values_t1=2.0, values_t2=3.0, values_t3=5.0), ] serialized_examples = [x.SerializeToString() for x in examples] features = tf.parse_example(serialized_examples, label_feature_spec) _make_embedding_and_sparse_values(features) label = features.pop('label') return features, label def serving_input_receiver_fn(): """Serving input receiver function.""" serialized_tf_example = tf.placeholder( dtype=tf.string, shape=[None], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.parse_example(serialized_tf_example, input_feature_spec) _make_embedding_and_sparse_values(features) return tf.estimator.export.ServingInputReceiver(features, receiver_tensors) def eval_input_receiver_fn(): """Eval input receiver function.""" serialized_tf_example = tf.placeholder( dtype=tf.string, shape=[None], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.parse_example(serialized_tf_example, label_feature_spec) _make_embedding_and_sparse_values(features) return export.EvalInputReceiver( features=features, receiver_tensors=receiver_tensors, labels=features['label']) estimator = tf.estimator.Estimator(model_fn=model_fn) estimator.train(input_fn=train_input_fn, steps=10) export_dir = None eval_export_dir = None if export_path: export_dir = estimator.export_savedmodel( export_dir_base=export_path, serving_input_receiver_fn=serving_input_receiver_fn) if eval_export_path: eval_export_dir = export.export_eval_savedmodel( estimator=estimator, export_dir_base=eval_export_path, eval_input_receiver_fn=eval_input_receiver_fn) return export_dir, eval_export_dir
def parse_function(serialize_string): feature_description = { 'input': tf.VarLenFeature(dtype=tf.int64), 'output': tf.VarLenFeature(dtype=tf.int64), } return tf.io.parse_single_example(serialize_string, feature_description)
def simple_control_dependency_estimator(export_path, eval_export_path): """Exports a simple estimator with control dependencies.""" def control_dependency_metric(increment, target): """Metric that introduces a control dependency on target. The value is incremented by increment each time the metric is called (so the value can vary depending on how things are batched). This is mainly to verify that the metric was called. Args: increment: Amount to increment the value by each time the metric is called. target: Tensor to introduce the control dependency on. Returns: value_op, update_op for the metric. """ total_value = tf.Variable(initial_value=0.0, dtype=tf.float64, trainable=False, collections=[ tf.GraphKeys.METRIC_VARIABLES, tf.GraphKeys.LOCAL_VARIABLES ], validate_shape=True) with tf.control_dependencies([target]): update_op = tf.assign_add(total_value, increment) value_op = tf.identity(total_value) return value_op, update_op def model_fn(features, labels, mode, params): """Model function for custom estimator.""" del params predictions = features['prediction'] predictions_dict = { prediction_keys.PredictionKeys.PREDICTIONS: predictions, } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions_dict, export_outputs={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.RegressionOutput(predictions) }) loss = tf.losses.mean_squared_error(predictions, labels['actual_label']) train_op = tf.assign_add(tf.train.get_global_step(), 1) eval_metric_ops = {} if mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { metric_keys.MetricKeys.LOSS_MEAN: tf.metrics.mean(loss), 'control_dependency_on_fixed_float': control_dependency_metric(1.0, features['fixed_float']), # Introduce a direct dependency on the values Tensor. If we # introduce another intervening op like sparse_tensor_to_dense then # regardless of whether TFMA correctly wrap SparseTensors we will not # encounter the TF bug. 'control_dependency_on_var_float': control_dependency_metric(10.0, features['var_float'].values), 'control_dependency_on_actual_label': control_dependency_metric(100.0, labels['actual_label']), 'control_dependency_on_var_int_label': control_dependency_metric(1000.0, labels['var_int'].values), # Note that TFMA does *not* wrap predictions, so in most cases # if there's a control dependency on predictions they will be # recomputed. 'control_dependency_on_prediction': control_dependency_metric(10000.0, predictions), } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, predictions=predictions_dict, eval_metric_ops=eval_metric_ops) def train_input_fn(): """Train input function.""" return { 'prediction': tf.constant([[1.0], [2.0], [3.0], [4.0]]), }, { 'actual_label': tf.constant([[1.0], [2.0], [3.0], [4.0]]) } feature_spec = {'prediction': tf.FixedLenFeature([1], dtype=tf.float32)} eval_feature_spec = { 'prediction': tf.FixedLenFeature([1], dtype=tf.float32), 'label': tf.FixedLenFeature([1], dtype=tf.float32), 'fixed_float': tf.FixedLenFeature([1], dtype=tf.float32), 'fixed_string': tf.FixedLenFeature([1], dtype=tf.string), 'fixed_int': tf.FixedLenFeature([1], dtype=tf.int64), 'var_float': tf.VarLenFeature(dtype=tf.float32), 'var_string': tf.VarLenFeature(dtype=tf.string), 'var_int': tf.VarLenFeature(dtype=tf.int64), } estimator = tf.estimator.Estimator(model_fn=model_fn) estimator.train(input_fn=train_input_fn, steps=1) def eval_input_receiver_fn(): """An input_fn that expects a serialized tf.Example.""" serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') features = tf.parse_example(serialized_tf_example, eval_feature_spec) labels = { 'actual_label': features['label'], 'var_int': features['var_int'] } return export.EvalInputReceiver( features=features, labels=labels, receiver_tensors={'examples': serialized_tf_example}) return util.export_model_and_eval_model( estimator=estimator, serving_input_receiver_fn=( tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec)), eval_input_receiver_fn=eval_input_receiver_fn, export_path=export_path, eval_export_path=eval_export_path)
def get_split(split_name, dataset_dir, file_pattern=None, reader=None): """Gets a dataset tuple with instructions Args: split_name: A train/test split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/test split. """ if split_name not in SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % split_name) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader # #文件名格式 # if file_pattern is None: # file_pattern = _get_output_filename('tfrecords','voc_2007_train')#need fix your filename # print(file_pattern) # 适配器1:将example反序列化成存储之前的格式。由tf完成 keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'image/height': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/channels': tf.FixedLenFeature([1], tf.int64), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), } #适配器2:将反序列化的数据组装成更高级的格式。由slim完成 items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), } # 解码器 decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) # dataset对象定义了数据集的文件位置,解码方式等元信息 dataset = slim.dataset.Dataset( data_sources=file_pattern, reader=reader, num_samples=SPLITS_TO_SIZES['test'], # 手动生成了三个文件, 每个文件里只包含一个example decoder=decoder, items_to_descriptions=ITEMS_TO_DESCRIPTIONS, num_classes=NUM_CLASSES) return dataset
def read_labels(data_pattern, cache_path=""): """Read labels from TFRecords. Args: data_pattern: the data pattern to the TFRecords. cache_path: the cache path for the label file. Returns: a Labels object. """ if cache_path: if tf.gfile.Exists(cache_path): tf.logging.info("Reading cached labels from %s..." % cache_path) return Labels.from_file(cache_path) tf.enable_eager_execution() if 'validate' in data_pattern: with tf.name_scope("eval_input"): # randomly chosen 60 validate files # note that validate file names are different on gcloud and locally, due to `curl` download command results = [] for i in range(3844): results.append(str(i).zfill(4)) random.seed(7) random.shuffle(results) validate_file_nums = results[:300] validate_file_list_60 = [data_pattern.split('*')[0]\ + x +'.tfrecord' for x in validate_file_nums] data_paths = validate_file_list_60 else: data_paths = tf.gfile.Glob(data_pattern) ds = tf.data.TFRecordDataset(data_paths, num_parallel_reads=50) context_features = { "id": tf.FixedLenFeature([], tf.string), "segment_labels": tf.VarLenFeature(tf.int64), "segment_start_times": tf.VarLenFeature(tf.int64), "segment_scores": tf.VarLenFeature(tf.float32) } def _parse_se_func(sequence_example): return tf.parse_single_sequence_example( sequence_example, context_features=context_features) ds = ds.map(_parse_se_func) rated_labels = {} tf.logging.info("Reading labels from TFRecords...") last_batch = 0 batch_size = 5000 for cxt_feature_val, _ in ds: video_id = cxt_feature_val["id"].numpy() segment_labels = cxt_feature_val["segment_labels"].values.numpy() segment_start_times = cxt_feature_val[ "segment_start_times"].values.numpy() segment_scores = cxt_feature_val["segment_scores"].values.numpy() for label, start_time, score in zip(segment_labels, segment_start_times, segment_scores): rated_labels[("%s:%d" % (video_id.decode("utf-8"), start_time), label)] = score batch_id = len(rated_labels) // batch_size if batch_id != last_batch: tf.logging.info("%d examples processed.", len(rated_labels)) last_batch = batch_id tf.logging.info("Finish reading labels from TFRecords...") labels_obj = Labels(rated_labels) if cache_path: tf.logging.info("Caching labels to %s..." % cache_path) labels_obj.to_file(cache_path) return labels_obj
def get_datasets(data_dir, file_pattern='*.tfrecord'): file_patterns = os.path.join(data_dir, file_pattern) print('file_path: {}'.format(file_patterns)) file_path_list = glob.glob(file_patterns) #num_samples = 0 #only for icdar 2015 dataset #num_samples = 288688 #only for ppt datasets num_samples = 858750 #only for synth datasets for file_path in file_path_list: for _ in tf.python_io.tf_record_iterator(file_path): num_samples += 1 print('num_samples:', num_samples) reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ignored': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'filename': slim.tfexample_decoder.Tensor('image/filename'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/oriented_bbox/x1': slim.tfexample_decoder.Tensor('image/object/bbox/x1'), 'object/oriented_bbox/x2': slim.tfexample_decoder.Tensor('image/object/bbox/x2'), 'object/oriented_bbox/x3': slim.tfexample_decoder.Tensor('image/object/bbox/x3'), 'object/oriented_bbox/x4': slim.tfexample_decoder.Tensor('image/object/bbox/x4'), 'object/oriented_bbox/y1': slim.tfexample_decoder.Tensor('image/object/bbox/y1'), 'object/oriented_bbox/y2': slim.tfexample_decoder.Tensor('image/object/bbox/y2'), 'object/oriented_bbox/y3': slim.tfexample_decoder.Tensor('image/object/bbox/y3'), 'object/oriented_bbox/y4': slim.tfexample_decoder.Tensor('image/object/bbox/y4'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 'object/ignored': slim.tfexample_decoder.Tensor('image/object/bbox/ignored') } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = {0: 'background', 1: 'text'} return slim.dataset.Dataset(data_sources=file_patterns, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=ITEMS_TO_DESCRIPTIONS, num_classes=NUM_CLASSES, labels_to_names=labels_to_names)
def _parse_function(self, sequence_example_proto): """Parse a SequenceExample in the AutoDL/TensorFlow format. Args: sequence_example_proto: a SequenceExample with "x_dense_input" or sparse input representation. Returns: An array of tensors. For first edition of AutoDl challenge, returns a pair `(features, labels)` where `features` is a Tensor of shape [sequence_size, row_count, col_count, num_channels] and `labels` a Tensor of shape [output_dim, ] """ sequence_features = {} for i in range(self.metadata_.get_bundle_size()): if self.metadata_.is_sparse(i): sequence_features[self._feature_key( i, "sparse_col_index")] = tf.VarLenFeature(tf.int64) sequence_features[self._feature_key( i, "sparse_row_index")] = tf.VarLenFeature(tf.int64) sequence_features[self._feature_key( i, "sparse_value")] = tf.VarLenFeature(tf.float32) elif self.metadata_.is_compressed(i): sequence_features[self._feature_key( i, "compressed")] = tf.VarLenFeature(tf.string) else: sequence_features[self._feature_key( i, "dense_input")] = tf.FixedLenSequenceFeature( self.metadata_.get_tensor_size(i), dtype=tf.float32) print('sequence_features') print(sequence_features) contexts, features = tf.parse_single_sequence_example( sequence_example_proto, context_features={ "label_index": tf.VarLenFeature(tf.int64), "label_score": tf.VarLenFeature(tf.float32) }, sequence_features=sequence_features) print('features') print(features) sample = [] for i in range(self.metadata_.get_bundle_size()): key_dense = self._feature_key(i, "dense_input") row_count, col_count = self.metadata_.get_matrix_size(i) num_channels = self.metadata_.get_num_channels(i) sequence_size = self.metadata_.get_sequence_size() fixed_matrix_size = row_count > 0 and col_count > 0 row_count = row_count if row_count > 0 else None col_count = col_count if col_count > 0 else None if key_dense in features: f = features[key_dense] if not fixed_matrix_size: raise ValueError("To parse dense data, the tensor shape should " + "be known but got {} instead..." \ .format((sequence_size, row_count, col_count))) f = tf.reshape(f, [sequence_size, row_count, col_count, num_channels]) sample.append(f) sequence_size = sequence_size if sequence_size > 0 else None key_compressed = self._feature_key(i, "compressed") if key_compressed in features: compressed_images = features[key_compressed].values decompress_image_func = \ lambda x: dataset_utils.decompress_image(x, num_channels=num_channels) # `images` here is a 4D-tensor of shape [T, H, W, C], some of which # might be unknown images = tf.map_fn( decompress_image_func, compressed_images, dtype=tf.float32) images.set_shape([sequence_size, row_count, col_count, num_channels]) sample.append(images) key_sparse_val = self._feature_key(i, "sparse_value") if key_sparse_val in features: key_sparse_col = self._feature_key(i, "sparse_col_index") key_sparse_row = self._feature_key(i, "sparse_row_index") sparse_col = features[key_sparse_col].values sparse_row = features[key_sparse_row].values sparse_val = features[key_sparse_val] indices = sparse_val.indices indices = tf.concat([ tf.reshape(indices[:, 0], [-1, 1]), tf.reshape(sparse_row, [-1, 1]), tf.reshape(sparse_col, [-1, 1]) ], 1) sparse_tensor = tf.sparse_reorder( tf.SparseTensor( indices, sparse_val.values, [sequence_size, row_count, col_count])) # TODO: see how we can keep sparse tensors instead of # returning dense ones. tensor = tf.sparse_tensor_to_dense(sparse_tensor) tensor = tf.reshape(tensor, [sequence_size, row_count, col_count, 1]) sample.append(tensor) labels = tf.sparse_to_dense( contexts["label_index"].values, (self.metadata_.get_output_size(),), contexts["label_score"].values, validate_indices=False) # sparse_tensor = tf.sparse.SparseTensor(indices=(contexts["label_index"].values,), # values=contexts["label_score"].values, # dense_shape=(self.metadata_.get_output_size(),)) # labels = tf.sparse.to_dense(sparse_tensor, validate_indices=False) sample.append(labels) return sample
def slim_get_batch(num_classes, batch_size, split_name, file_pattern, num_readers, num_epochs=None, is_training=True): """获取一个数据集元组,其中包含有关读取P数据集的说明。 Args: num_classes:数据集中的总类数。 batch_size: the size of each batch. split_name: 'train' of 'val'. file_pattern: 匹配数据集源时使用的文件模式(完整路径)。 num_readers: 用于阅读tfrecords的最大阅读器数量。 num_preprocessing_threads: 用于运行预处理功能的最大线程数。 image_preprocessing_fn: 用于数据集扩充的函数。 anchor_encoder: 用于编码所有锚点的函数。 num_epochs: 用于迭代此数据集的总epoches。 is_training: Returns: allow_smaller_final_batch=(not is_training), num_threads=num_preprocessing_threads, capacity=64 * batch_size) """ if split_name not in data_splits_num: raise ValueError('split name %s was not recognized.' % split_name) # Features in Pascal VOC TFRecords. keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/height': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'object/bbox': slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = {} for name, pair in classes.items(): labels_to_names[pair[0]] = name # print('label_names',labels_to_names) dataset = slim.dataset.Dataset(data_sources=file_pattern, reader=tf.TFRecordReader, decoder=decoder, num_samples=data_splits_num[split_name], items_to_descriptions=None, num_classes=num_classes, labels_to_names=labels_to_names) with tf.name_scope('dataset_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, common_queue_capacity=32 * batch_size, common_queue_min=8 * batch_size, shuffle=is_training, num_epochs=num_epochs) # [image, shape, glabels_raw, gbboxes_raw] = provider.get(['image', 'shape', # 'object/label','object/bbox']) with tf.Session() as sess: sess.run([ tf.local_variables_initializer(), tf.global_variables_initializer() ]) tf.train.start_queue_runners() for i in range(provider._num_samples): [image, labelList, boxList, shape] = provider.get( ['image', 'object/label', 'object/bbox', 'shape']) img, labels, boxes, shape = sess.run( [image, labelList, boxList, shape]) print(labels) # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#不转换反而是RGB显示 # print('{}is ,has shape :{}'.format(img, shape)) # img=cv2.imread(img) # img = img / 255.0#归一化以后会化成黑色 for j in range(len(labels)): print('value:', (boxes[j][0], boxes[j][1]), (boxes[j][2], boxes[j][3])) cv2.rectangle( img, (int(boxes[j][0] * shape[0]), int(boxes[j][1] * shape[1])), (int(boxes[j][2] * shape[0]), int(boxes[j][3] * shape[1])), (0, 255, 0), 3) plt.imshow(img) plt.show() cv2.imwrite("./rec.jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) # cv2.waitKey(0) # cv2.destroyAllWindows() # plt.show() break
capacity=capacity, min_after_dequeue=min_after_dequeue, enqueue_many=True) else: serialized_example = read_and_decode(filename_queue) batch_serialized_example = tf.train.shuffle_batch( [serialized_example], batch_size=batch_size, num_threads=thread_number, capacity=capacity, min_after_dequeue=min_after_dequeue) features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) batch_labels = features["label"] batch_ids = features["ids"] batch_values = features["values"] init_op = tf.global_variables_initializer() sess = tf.Session() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(init_op, options=run_options, run_metadata=run_metadata) sess.run(tf.local_variables_initializer(), options=run_options,
def parse_example_proto(example_serialized): """Parses an Example proto containing a training example of an image. The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields: image/height: 462 image/width: 581 image/colorspace: 'RGB' image/channels: 3 image/class/label: 615 image/class/synset: 'n03623198' image/class/text: 'knee pad' image/object/bbox/xmin: 0.1 image/object/bbox/xmax: 0.9 image/object/bbox/ymin: 0.2 image/object/bbox/ymax: 0.6 image/object/bbox/label: 615 image/format: 'JPEG' image/filename: 'ILSVRC2012_val_00041207.JPEG' image/encoded: <JPEG encoded string> Args: example_serialized: scalar Tensor tf.string containing a serialized Example protocol buffer. Returns: filename: Tensor tf.string containing the filename label: Tensor tf.int32 containing the label. bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. text: Tensor tf.string containing the human-readable label. """ # Dense features in Example proto. feature_map = { 'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), } sparse_float32 = tf.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. feature_map.update({ k: sparse_float32 for k in [ 'image/object/bbox/xmin', 'image/object/bbox/ymin', 'image/object/bbox/xmax', 'image/object/bbox/ymax' ] }) features = tf.parse_single_example(example_serialized, feature_map) label = tf.cast(features['image/class/label'], dtype=tf.int32) xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) # Note that we impose an ordering of (y, x) just to make life difficult. bbox = tf.concat(0, [ymin, xmin, ymax, xmax]) # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. bbox = tf.expand_dims(bbox, 0) bbox = tf.transpose(bbox, [0, 2, 1]) return features['image/filename'], label, bbox, features[ 'image/class/text']
def get_split(split_name, dataset_dir, data_name='Market1501', file_pattern=None, reader=None): """Gets a dataset tuple with instructions for reading Market1501. Args: split_name: A train/validation split name. dataset_dir: The base directory of the dataset sources. file_pattern: The file pattern to use when matching the dataset sources. It is assumed that the pattern contains a '%s' string so that the split name can be inserted. reader: The TensorFlow reader type. Returns: A `Dataset` namedtuple. Raises: ValueError: if `split_name` is not a valid train/validation split. """ if split_name not in SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) if not file_pattern: file_pattern = _FILE_PATTERN file_pattern = os.path.join(dataset_dir, file_pattern % (data_name, split_name)) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image_raw_0': tf.FixedLenFeature([], tf.string), 'image_raw_1': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64), # For FixedLenFeature, [] means scalar 'id_0': tf.FixedLenFeature([], tf.int64), 'id_1': tf.FixedLenFeature([], tf.int64), 'cam_0': tf.FixedLenFeature([], tf.int64), 'cam_1': tf.FixedLenFeature([], tf.int64), 'image_format': tf.FixedLenFeature([], tf.string, default_value='jpg'), 'image_height': tf.FixedLenFeature([], tf.int64, default_value=128), 'image_width': tf.FixedLenFeature([], tf.int64, default_value=64), 'real_data': tf.FixedLenFeature([], tf.int64, default_value=1), 'pose_peaks_0': tf.FixedLenFeature([16 * 8 * 18], tf.float32), 'pose_peaks_1': tf.FixedLenFeature([16 * 8 * 18], tf.float32), 'pose_mask_r4_0': tf.FixedLenFeature([128 * 64 * 1], tf.int64), 'pose_mask_r4_1': tf.FixedLenFeature([128 * 64 * 1], tf.int64), 'shape': tf.FixedLenFeature([1], tf.int64), 'indices_r4_0': tf.VarLenFeature(dtype=tf.int64), 'values_r4_0': tf.VarLenFeature(dtype=tf.float32), 'indices_r4_1': tf.VarLenFeature(dtype=tf.int64), 'values_r4_1': tf.VarLenFeature(dtype=tf.float32), 'pose_subs_0': tf.FixedLenFeature([20], tf.float32), 'pose_subs_1': tf.FixedLenFeature([20], tf.float32), } items_to_handlers = { 'image_raw_0': slim.tfexample_decoder.Image(image_key='image_raw_0', format_key='image_format'), 'image_raw_1': slim.tfexample_decoder.Image(image_key='image_raw_1', format_key='image_format'), 'label': slim.tfexample_decoder.Tensor('label'), 'id_0': slim.tfexample_decoder.Tensor('id_0'), 'id_1': slim.tfexample_decoder.Tensor('id_1'), 'pose_peaks_0': slim.tfexample_decoder.Tensor('pose_peaks_0', shape=[16 * 8 * 18]), 'pose_peaks_1': slim.tfexample_decoder.Tensor('pose_peaks_1', shape=[16 * 8 * 18]), 'pose_mask_r4_0': slim.tfexample_decoder.Tensor('pose_mask_r4_0', shape=[128 * 64 * 1]), 'pose_mask_r4_1': slim.tfexample_decoder.Tensor('pose_mask_r4_1', shape=[128 * 64 * 1]), 'pose_sparse_r4_0': slim.tfexample_decoder.SparseTensor(indices_key='indices_r4_0', values_key='values_r4_0', shape_key='shape', densify=False), 'pose_sparse_r4_1': slim.tfexample_decoder.SparseTensor(indices_key='indices_r4_1', values_key='values_r4_1', shape_key='shape', densify=False), 'pose_subs_0': slim.tfexample_decoder.Tensor('pose_subs_0', shape=[20]), 'pose_subs_1': slim.tfexample_decoder.Tensor('pose_subs_1', shape=[20]), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = None if dataset_utils.has_labels(dataset_dir): labels_to_names = dataset_utils.read_label_file(dataset_dir) print('load pn_pairs_num ......') fpath = os.path.join(dataset_dir, 'pn_pairs_num_' + split_name + '.p') with open(fpath, 'r') as f: pn_pairs_num = pickle.load(f) return slim.dataset.Dataset(data_sources=file_pattern, reader=reader, decoder=decoder, num_samples=pn_pairs_num, items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, num_classes=_NUM_CLASSES, labels_to_names=labels_to_names)
def example_reading_spec(self): data_fields, _ = super(Seq2editsGec, self).example_reading_spec() data_fields['targets_error_tag'] = tf.VarLenFeature(tf.int64) return data_fields, None
def test(): vocab_size = len(open(FLAGS.vocab_file).readlines()) id_to_label = load_id_to_label() num_label = len(id_to_label) print('#vocab={} #label={}'.format(vocab_size, num_label)) data_sources = [ FLAGS.train_tfrecord, ] is_training = True reader = tf.TFRecordReader keys_to_features = { TEXT_KEY: tf.VarLenFeature(dtype=tf.string), LABELS_KEY: tf.FixedLenFeature([num_label], tf.float32, default_value=tf.zeros([num_label], dtype=tf.float32)), } items_to_handlers = { 'text': slim.tfexample_decoder.Tensor(TEXT_KEY, default_value=DEFAULT_WORD), 'labels': slim.tfexample_decoder.Tensor(LABELS_KEY), } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) num_samples = 1 # np.inf items_to_descriptions = { 'text': 'text', 'labels': 'labels', } dataset = slim.dataset.Dataset( data_sources=data_sources, reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=items_to_descriptions, ) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=is_training) text_ts, labels_ts, = provider.get(['text', 'labels']) # with tf.Session() as sess: # with slim.queues.QueueRunners(sess): # for i in range(10000): # text_np, labels_np = sess.run([text_ts, labels_ts]) # label_ids = [i for i in range(num_label) if labels_np[i] != 0] # labels = [id_to_label[label_id] for label_id in label_ids] # text = [text_np[i].decode('utf-8') for i in range(text_np.shape[0]) if text_np[i] != b' '] # text = ' '.join(text) # print(str(text), labels) # input() text_bt, labels_bt = tf.train.batch([text_ts, labels_ts], batch_size=FLAGS.batch_size, dynamic_pad=True) with tf.Session() as sess: with slim.queues.QueueRunners(sess): for i in range(10000): text_np, labels_np = sess.run([text_bt, labels_bt]) print(type(text_np), type(labels_np)) print(text_np.shape, labels_np.shape) input()