def parse_record(raw_record, is_training): """Parses a record containing a training example of an image. The input record is parsed into a label and image, and the image is passed through preprocessing steps (cropping, flipping, and so on). Args: raw_record: scalar Tensor tf.string containing a serialized Example protocol buffer. is_training: A boolean denoting whether the input is for training. Returns: Tuple with processed image tensor and one-hot-encoded label tensor. """ image, label = _parse_example_proto(raw_record) # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height # and width of image is unknown at compile-time. # Results in a 3-D int8 Tensor which we then convert to a float # with values ranging from [0, 1). image = tf.image.decode_jpeg(image, channels=_NUM_CHANNELS) image = tf.image.convert_image_dtype(image, tf.float32) image = vgg_preprocessing.preprocess_image( image=image, output_height=_DEFAULT_IMAGE_SIZE, output_width=_DEFAULT_IMAGE_SIZE, is_training=is_training) label = tf.cast(tf.reshape(label, shape=[]), dtype=tf.int32) label = tf.one_hot(label, _NUM_CLASSES) return image, label
def parse_record(raw_record, is_training): """Parse an ImageNet record from `value`.""" keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } parsed = tf.parse_single_example(raw_record, keys_to_features) image = tf.image.decode_image( tf.reshape(parsed['image/encoded'], shape=[]), _NUM_CHANNELS) # Note that tf.image.convert_image_dtype scales the image data to [0, 1). image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = vgg_preprocessing.preprocess_image( image=image, output_height=_DEFAULT_IMAGE_SIZE, output_width=_DEFAULT_IMAGE_SIZE, is_training=is_training) label = tf.cast( tf.reshape(parsed['image/class/label'], shape=[]), dtype=tf.int32) return image, tf.one_hot(label, _NUM_CLASSES)
def dataset_parser(self, serialized_proto): """Parse an Imagenet record from value.""" keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), } features = tf.parse_single_example(serialized_proto, keys_to_features) bbox = None if FLAGS.use_annotated_bbox: xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) # Note that we impose an ordering of (y, x) just to make life difficult. bbox = tf.concat([ymin, xmin, ymax, xmax], 0) # Force the variable number of bounding boxes into the shape # [1, num_boxes, coords]. bbox = tf.expand_dims(bbox, 0) bbox = tf.transpose(bbox, [0, 2, 1]) image = features['image/encoded'] image = tf.image.decode_jpeg(image, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) if FLAGS.preprocessing == 'vgg': image = vgg_preprocessing.preprocess_image( image=image, output_height=FLAGS.height, output_width=FLAGS.width, is_training=self.is_training, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX) elif FLAGS.preprocessing == 'inception': image = inception_preprocessing.preprocess_image( image=image, output_height=FLAGS.height, output_width=FLAGS.width, is_training=self.is_training, bbox=bbox) else: assert False, 'Unknown preprocessing type: %s' % FLAGS.preprocessing image = tf.cast(image, dtype=tf.float16) label = tf.cast( tf.reshape(features['image/class/label'], shape=[]), dtype=tf.int32) return image, label
def read_and_decode(filenames, num_epochs, preprocess=False): # read iris_contact.tfrecords filename_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # return file_name and file if not preprocess: features = tf.parse_single_example(serialized_example, features={ 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'label/value': tf.VarLenFeature(tf.int64), 'label/length': tf.FixedLenFeature([1], tf.int64) }) # return image and label # Preprocessing Here img = tf.decode_raw(features['image/encoded'], tf.uint8) img = tf.reshape(img, [HEIGHT, WIDTH, 3]) # img = tf.image.rgb_to_grayscale(img) img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 # throw img tensor label = features['label/value'] # throw label tensor label = tf.cast(label, tf.int32) length = features["label/length"] else: features = tf.parse_single_example(serialized_example, features={ 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'label/value': tf.VarLenFeature(tf.int64), 'label/length': tf.FixedLenFeature([1], tf.int64), 'image/width': tf.FixedLenFeature([1], tf.int64), 'image/height': tf.FixedLenFeature([1], tf.int64) }) # return image and label img = tf.decode_raw(features['image/encoded'], tf.uint8) width = features['image/width'] height = features['image/height'] shape = tf.concat([height, width, [3]], 0) img = tf.reshape(img, shape) # random resize ratio = tf.random_uniform([1], maxval=0.9) width = tf.cast(tf.cast(width, tf.float32) * (1 - tf.pow(ratio, 3)), tf.int32) width = tf.cond(tf.squeeze(width) < 2, lambda: tf.constant([2]), lambda: width) height = tf.cast(height, tf.int32) img = tf.image.resize_images(img, tf.concat([height, width], 0)) # Process to HEIGHT and WIDTH ratio = tf.cast(HEIGHT, tf.float32) / tf.cast(height, tf.float32) actual_width = tf.cast(tf.cast(width, tf.float32) * ratio, tf.int32) # img = tf.Print(img, [tf.shape(img), height, width, ratio, actual_width]) img, img_width = tf.cond(tf.squeeze(actual_width <= WIDTH), lambda: [tf.image.pad_to_bounding_box(tf.image.resize_images(img, tf.cast(tf.concat([[HEIGHT], actual_width], 0), tf.int32)), 0, 0, HEIGHT, WIDTH), tf.squeeze(actual_width)], lambda: [tf.image.resize_images(img, [HEIGHT, WIDTH]), WIDTH] ) # img = tf.image.resize_image_with_crop_or_pad(tf.image.resize_images(img, [HEIGHT, WIDTH/2]), HEIGHT, WIDTH) # Vallina # img = tf.cast(img, tf.float32) * (1. / 255.) - 0.5 # throw img tensor # ResNet img = tf.cast(img, tf.float32) / 255. img = vgg_preprocessing.preprocess_image( image=img, output_height=HEIGHT, output_width=WIDTH) label = features['label/value'] # throw label tensor label = tf.cast(label, tf.int32) length = features["label/length"] return img, label, length, img_width
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) with tf.Graph().as_default() as g: with open(FLAGS.input_fname, 'r') as f: filenames = [line.split(',')[0][:-4] for line in f.readlines()] filenames = [ os.path.join(FLAGS.image_dir, name) for name in filenames \ if not os.path.exists(os.path.join(FLAGS.output_dir, name + '.npy')) ] filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() key, value = reader.read(filename_queue) image = tf.image.decode_jpeg(value, channels=3) image_size = resnet_v1.resnet_v1.default_image_size processed_image = vgg_preprocessing.preprocess_image( image, image_size, image_size, is_training=False ) processed_images, keys = tf.train.batch( [processed_image, key], FLAGS.batch_size, num_threads=8, capacity=8*FLAGS.batch_size*5, allow_smaller_final_batch=True ) # Create the model with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( processed_images, num_classes=1000, is_training=False ) init_fn = slim.assign_from_checkpoint_fn( FLAGS.checkpoint_dir, slim.get_model_variables() ) pool5 = g.get_operation_by_name('resnet_v1_101/pool5').outputs[0] pool5 = tf.transpose(pool5, perm=[0, 3, 1, 2]) # (batch_size, 2048, 1, 1) with tf.Session() as sess: init_fn(sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: for step in tqdm( xrange(len(filenames) / FLAGS.batch_size + 1), ncols=70 ): if coord.should_stop(): break file_names, pool5_value = sess.run([keys, pool5]) for i in xrange(len(file_names)): np.save( os.path.join( FLAGS.output_dir, os.path.basename(file_names[i]) + '.npy' ), pool5_value[i].astype(np.float32) ) except tf.errors.OutOfRangeError: print "Done feature extraction -- epoch limit reached" finally: coord.request_stop() coord.join(threads)
def main(args): image_list_file = os.path.join( *[args.dataset_dir, 'image_lists', args.image_list_file]) print('image_list_file =', image_list_file) with open(image_list_file) as handle: rows = handle.read().splitlines() rows = [row.split(',') for row in rows] image_files = [row[1] for row in rows] if len(rows[0]) > 2: crops = [row[2:] for row in rows] else: crops = [None] * len(image_files) print('len(image_files) =', len(image_files)) output_file = os.path.join( *[args.dataset_dir, "resnet_fcn_features", args.output_file]) output_file_handle = h5py.File(output_file, 'w') dataset_name = re.sub('.txt', '', args.image_list_file.split('/')[-1]) hpy5_dataset = output_file_handle.create_dataset(dataset_name, shape=(len(image_files), 32, 32, 2048), dtype='f') images_placeholder = tf.placeholder(tf.float32, shape=(None, 512, 512, 3)) preprocessed_batch = tf.map_fn( lambda img: preprocess_image(img, output_height=512, output_width=512, is_training=False, resize_side_min=512, resize_side_max=512), images_placeholder) with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, all_layers = resnet_v2.resnet_v2_101(preprocessed_batch, 21, is_training=False, global_pool=False, output_stride=16) init_fn = get_init_fn(args.ckpt_path) # This is to prevent a CuDNN error - https://github.com/tensorflow/tensorflow/issues/24828 config = tf.ConfigProto() # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) init_fn(sess) num_images_in_batch = 0 num_batches_done = 0 batch_images = list() num_images_processed = 0 if args.max_images_to_process is not None: max_images_to_process = args.max_images_to_process else: # Set it to a number that won't be hit max_images_to_process = len(image_files) + 1 print('max_images_to_process =', max_images_to_process) inputs = zip(image_files, crops) for image_file, crop in inputs: pillow_image = Image.open(image_file) if crop is not None: pillow_image = pillow_image.crop(crop) pillow_image = pillow_image.resize((512, 512)) np_image = get_numpy_array(pillow_image) np_image = np.expand_dims(np_image, 0) batch_images.append(np_image) num_images_in_batch += 1 if num_images_in_batch >= args.batch_size or num_images_processed >= max_images_to_process: batch_images = np.concatenate(batch_images, 0) feed_dict = {images_placeholder: batch_images} output, activations = sess.run([net, all_layers], feed_dict=feed_dict) features = activations['resnet_v2_101/block4'] batch_start_idx = num_batches_done * args.batch_size batch_end_idx = batch_start_idx + features.shape[0] print("batch_start_idx =", batch_start_idx) print("batch_end_idx =", batch_end_idx) print("features.shape =", features.shape) hpy5_dataset[batch_start_idx:batch_end_idx, :] = features print("Completed batch", num_batches_done) num_batches_done += 1 num_images_processed += num_images_in_batch num_images_in_batch = 0 print('num_images_processed =', num_images_processed) batch_images = list() if num_images_processed >= max_images_to_process: print('Breaking loop: num_images_processed =', num_images_processed, ', max_images_to_process =', max_images_to_process) break if len(batch_images) > 0: print('Completing remaining', len(batch_images), 'images') batch_images = np.concatenate(batch_images, 0) feed_dict = {images_placeholder: batch_images} output, activations = sess.run([net, all_layers], feed_dict=feed_dict) features = activations['resnet_v2_101/block4'] batch_start_idx = num_batches_done * args.batch_size batch_end_idx = batch_start_idx + features.shape[0] - 1 hpy5_dataset[batch_start_idx:batch_end_idx, :] = features num_batches_done += 1 output_file_handle.close()
with tf.Graph().as_default(): url = ("https://upload.wikimedia.org/wikipedia/commons/d/d9/" "First_Student_IC_school_bus_202076.jpg") # Open specified url and load image as a string image_string = urllib2.urlopen(url).read() # Decode string into matrix with intensity values image = tf.image.decode_jpeg(image_string, channels=3) # Resize the input image, preserving the aspect ratio # and make a central crop of the resulted image. # The crop will be of the size of the default image size of # the network. processed_image = vgg_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) # Networks accept images in batches. # The first dimension usually represents the batch size. # In our case the batch size is one. processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure # the batch norm parameters. arg_scope is a very conveniet # feature of slim library -- you can define default # parameters for layers -- like stride, padding etc. with slim.arg_scope(vgg.vgg_arg_scope()): logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False)