Exemplo n.º 1
0
def parse_record(raw_record, is_training):
  """Parses a record containing a training example of an image.

  The input record is parsed into a label and image, and the image is passed
  through preprocessing steps (cropping, flipping, and so on).

  Args:
    raw_record: scalar Tensor tf.string containing a serialized
      Example protocol buffer.
    is_training: A boolean denoting whether the input is for training.

  Returns:
    Tuple with processed image tensor and one-hot-encoded label tensor.
"""
  image, label = _parse_example_proto(raw_record)

  # Decode the string as an RGB JPEG.
  # Note that the resulting image contains an unknown height and width
  # that is set dynamically by decode_jpeg. In other words, the height
  # and width of image is unknown at compile-time.
  # Results in a 3-D int8 Tensor which we then convert to a float
  # with values ranging from [0, 1).
  image = tf.image.decode_jpeg(image, channels=_NUM_CHANNELS)
  image = tf.image.convert_image_dtype(image, tf.float32)

  image = vgg_preprocessing.preprocess_image(
      image=image,
      output_height=_DEFAULT_IMAGE_SIZE,
      output_width=_DEFAULT_IMAGE_SIZE,
      is_training=is_training)

  label = tf.cast(tf.reshape(label, shape=[]), dtype=tf.int32)
  label = tf.one_hot(label, _NUM_CLASSES)

  return image, label
Exemplo n.º 2
0
def parse_record(raw_record, is_training):
  """Parse an ImageNet record from `value`."""
  keys_to_features = {
      'image/encoded':
          tf.FixedLenFeature((), tf.string, default_value=''),
      'image/format':
          tf.FixedLenFeature((), tf.string, default_value='jpeg'),
      'image/class/label':
          tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
      'image/class/text':
          tf.FixedLenFeature([], dtype=tf.string, default_value=''),
      'image/object/bbox/xmin':
          tf.VarLenFeature(dtype=tf.float32),
      'image/object/bbox/ymin':
          tf.VarLenFeature(dtype=tf.float32),
      'image/object/bbox/xmax':
          tf.VarLenFeature(dtype=tf.float32),
      'image/object/bbox/ymax':
          tf.VarLenFeature(dtype=tf.float32),
      'image/object/class/label':
          tf.VarLenFeature(dtype=tf.int64),
  }

  parsed = tf.parse_single_example(raw_record, keys_to_features)

  image = tf.image.decode_image(
      tf.reshape(parsed['image/encoded'], shape=[]),
      _NUM_CHANNELS)

  # Note that tf.image.convert_image_dtype scales the image data to [0, 1).
  image = tf.image.convert_image_dtype(image, dtype=tf.float32)

  image = vgg_preprocessing.preprocess_image(
      image=image,
      output_height=_DEFAULT_IMAGE_SIZE,
      output_width=_DEFAULT_IMAGE_SIZE,
      is_training=is_training)

  label = tf.cast(
      tf.reshape(parsed['image/class/label'], shape=[]),
      dtype=tf.int32)

  return image, tf.one_hot(label, _NUM_CLASSES)
Exemplo n.º 3
0
  def dataset_parser(self, serialized_proto):
    """Parse an Imagenet record from value."""
    keys_to_features = {
        'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/class/label':
            tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
        'image/class/text':
            tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/object/bbox/xmin':
            tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin':
            tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax':
            tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax':
            tf.VarLenFeature(dtype=tf.float32),
        'image/object/class/label':
            tf.VarLenFeature(dtype=tf.int64),
    }

    features = tf.parse_single_example(serialized_proto, keys_to_features)

    bbox = None
    if FLAGS.use_annotated_bbox:
      xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
      ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
      xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
      ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)

      # Note that we impose an ordering of (y, x) just to make life difficult.
      bbox = tf.concat([ymin, xmin, ymax, xmax], 0)

      # Force the variable number of bounding boxes into the shape
      # [1, num_boxes, coords].
      bbox = tf.expand_dims(bbox, 0)
      bbox = tf.transpose(bbox, [0, 2, 1])

    image = features['image/encoded']
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    if FLAGS.preprocessing == 'vgg':
      image = vgg_preprocessing.preprocess_image(
          image=image,
          output_height=FLAGS.height,
          output_width=FLAGS.width,
          is_training=self.is_training,
          resize_side_min=_RESIZE_SIDE_MIN,
          resize_side_max=_RESIZE_SIDE_MAX)
    elif FLAGS.preprocessing == 'inception':
      image = inception_preprocessing.preprocess_image(
          image=image,
          output_height=FLAGS.height,
          output_width=FLAGS.width,
          is_training=self.is_training,
          bbox=bbox)
    else:
      assert False, 'Unknown preprocessing type: %s' % FLAGS.preprocessing

    image = tf.cast(image, dtype=tf.float16)
    label = tf.cast(
        tf.reshape(features['image/class/label'], shape=[]), dtype=tf.int32)

    return image, label
Exemplo n.º 4
0
def read_and_decode(filenames, num_epochs, preprocess=False):  # read iris_contact.tfrecords
    filename_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs)
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)  # return file_name and file

    if not preprocess:    
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
                                               'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
                                               'label/value': tf.VarLenFeature(tf.int64),
                                               'label/length': tf.FixedLenFeature([1], tf.int64)
                                           })  # return image and label

        # Preprocessing Here

        img = tf.decode_raw(features['image/encoded'], tf.uint8)
        img = tf.reshape(img, [HEIGHT, WIDTH, 3])  
        # img = tf.image.rgb_to_grayscale(img)
        img = tf.cast(img, tf.float32) * (1. / 255) - 0.5  # throw img tensor
        label = features['label/value']  # throw label tensor
        label = tf.cast(label, tf.int32)
        length = features["label/length"]
    else:
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
                                               'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
                                               'label/value': tf.VarLenFeature(tf.int64),
                                               'label/length': tf.FixedLenFeature([1], tf.int64),
                                               'image/width': tf.FixedLenFeature([1], tf.int64),
                                               'image/height': tf.FixedLenFeature([1], tf.int64)
                                           })  # return image and label        
        img = tf.decode_raw(features['image/encoded'], tf.uint8)
        width = features['image/width']
        height = features['image/height']
        shape = tf.concat([height, width, [3]], 0)
        img = tf.reshape(img, shape)

        # random resize
        ratio = tf.random_uniform([1], maxval=0.9)
        width = tf.cast(tf.cast(width, tf.float32) * (1 - tf.pow(ratio, 3)), tf.int32)
        width = tf.cond(tf.squeeze(width) < 2,
                        lambda: tf.constant([2]),
                        lambda: width)
        height = tf.cast(height, tf.int32)
        img = tf.image.resize_images(img, tf.concat([height, width], 0))

        # Process to HEIGHT and WIDTH  
        ratio = tf.cast(HEIGHT, tf.float32) / tf.cast(height, tf.float32)
        actual_width = tf.cast(tf.cast(width, tf.float32) * ratio, tf.int32) 
        # img = tf.Print(img, [tf.shape(img), height, width, ratio, actual_width])

        img, img_width = tf.cond(tf.squeeze(actual_width <= WIDTH),
                      lambda: [tf.image.pad_to_bounding_box(tf.image.resize_images(img, tf.cast(tf.concat([[HEIGHT], actual_width], 0), tf.int32)), 0, 0, HEIGHT, WIDTH),
                               tf.squeeze(actual_width)],
                      lambda: [tf.image.resize_images(img, [HEIGHT, WIDTH]),
                               WIDTH]
                      )
        # img = tf.image.resize_image_with_crop_or_pad(tf.image.resize_images(img, [HEIGHT, WIDTH/2]), HEIGHT, WIDTH)

        # Vallina
        # img = tf.cast(img, tf.float32) * (1. / 255.) - 0.5  # throw img tensor

        # ResNet
        img = tf.cast(img, tf.float32) / 255.
        img = vgg_preprocessing.preprocess_image(
            image=img,
            output_height=HEIGHT,
            output_width=WIDTH)

        label = features['label/value']  # throw label tensor
        label = tf.cast(label, tf.int32)
        length = features["label/length"]       

    return img, label, length, img_width
Exemplo n.º 5
0
def main(_):
  os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id

  if not tf.gfile.Exists(FLAGS.output_dir):
    tf.gfile.MakeDirs(FLAGS.output_dir)

  with tf.Graph().as_default() as g:
    with open(FLAGS.input_fname, 'r') as f:
      filenames = [line.split(',')[0][:-4] for line in f.readlines()]
      filenames = [
          os.path.join(FLAGS.image_dir, name) for name in filenames \
              if not os.path.exists(os.path.join(FLAGS.output_dir, name + '.npy'))
      ]

    filename_queue = tf.train.string_input_producer(filenames)
    reader = tf.WholeFileReader()
    key, value = reader.read(filename_queue)
    image = tf.image.decode_jpeg(value, channels=3)
    image_size = resnet_v1.resnet_v1.default_image_size
    processed_image = vgg_preprocessing.preprocess_image(
        image, image_size, image_size, is_training=False
    )
    processed_images, keys = tf.train.batch(
        [processed_image, key],
        FLAGS.batch_size,
        num_threads=8, capacity=8*FLAGS.batch_size*5,
        allow_smaller_final_batch=True
    )

    # Create the model
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
      net, end_points = resnet_v1.resnet_v1_101(
          processed_images, num_classes=1000, is_training=False
      )
      init_fn = slim.assign_from_checkpoint_fn(
          FLAGS.checkpoint_dir, slim.get_model_variables()
      )
      pool5 = g.get_operation_by_name('resnet_v1_101/pool5').outputs[0]
      pool5 = tf.transpose(pool5, perm=[0, 3, 1, 2])  # (batch_size, 2048, 1, 1)

      with tf.Session() as sess:
        init_fn(sess)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
          for step in tqdm(
              xrange(len(filenames) / FLAGS.batch_size + 1), ncols=70
          ):
            if coord.should_stop():
              break
            file_names, pool5_value = sess.run([keys, pool5])
            for i in xrange(len(file_names)):
              np.save(
                  os.path.join(
                      FLAGS.output_dir,
                      os.path.basename(file_names[i]) + '.npy'
                  ),
                  pool5_value[i].astype(np.float32)
              )
        except tf.errors.OutOfRangeError:
          print "Done feature extraction -- epoch limit reached"
        finally:
          coord.request_stop()

        coord.join(threads)
Exemplo n.º 6
0
def main(args):
    image_list_file = os.path.join(
        *[args.dataset_dir, 'image_lists', args.image_list_file])
    print('image_list_file =', image_list_file)
    with open(image_list_file) as handle:
        rows = handle.read().splitlines()
        rows = [row.split(',') for row in rows]
        image_files = [row[1] for row in rows]
        if len(rows[0]) > 2:
            crops = [row[2:] for row in rows]
        else:
            crops = [None] * len(image_files)
    print('len(image_files) =', len(image_files))

    output_file = os.path.join(
        *[args.dataset_dir, "resnet_fcn_features", args.output_file])
    output_file_handle = h5py.File(output_file, 'w')
    dataset_name = re.sub('.txt', '', args.image_list_file.split('/')[-1])
    hpy5_dataset = output_file_handle.create_dataset(dataset_name,
                                                     shape=(len(image_files),
                                                            32, 32, 2048),
                                                     dtype='f')

    images_placeholder = tf.placeholder(tf.float32, shape=(None, 512, 512, 3))
    preprocessed_batch = tf.map_fn(
        lambda img: preprocess_image(img,
                                     output_height=512,
                                     output_width=512,
                                     is_training=False,
                                     resize_side_min=512,
                                     resize_side_max=512), images_placeholder)
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        net, all_layers = resnet_v2.resnet_v2_101(preprocessed_batch,
                                                  21,
                                                  is_training=False,
                                                  global_pool=False,
                                                  output_stride=16)
    init_fn = get_init_fn(args.ckpt_path)

    # This is to prevent a CuDNN error - https://github.com/tensorflow/tensorflow/issues/24828
    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        init_fn(sess)

        num_images_in_batch = 0
        num_batches_done = 0
        batch_images = list()
        num_images_processed = 0

        if args.max_images_to_process is not None:
            max_images_to_process = args.max_images_to_process
        else:
            # Set it to a number that won't be hit
            max_images_to_process = len(image_files) + 1
        print('max_images_to_process =', max_images_to_process)

        inputs = zip(image_files, crops)
        for image_file, crop in inputs:
            pillow_image = Image.open(image_file)
            if crop is not None:
                pillow_image = pillow_image.crop(crop)
            pillow_image = pillow_image.resize((512, 512))
            np_image = get_numpy_array(pillow_image)
            np_image = np.expand_dims(np_image, 0)
            batch_images.append(np_image)

            num_images_in_batch += 1

            if num_images_in_batch >= args.batch_size or num_images_processed >= max_images_to_process:
                batch_images = np.concatenate(batch_images, 0)
                feed_dict = {images_placeholder: batch_images}
                output, activations = sess.run([net, all_layers],
                                               feed_dict=feed_dict)
                features = activations['resnet_v2_101/block4']
                batch_start_idx = num_batches_done * args.batch_size
                batch_end_idx = batch_start_idx + features.shape[0]
                print("batch_start_idx =", batch_start_idx)
                print("batch_end_idx =", batch_end_idx)
                print("features.shape =", features.shape)
                hpy5_dataset[batch_start_idx:batch_end_idx, :] = features

                print("Completed batch", num_batches_done)
                num_batches_done += 1
                num_images_processed += num_images_in_batch
                num_images_in_batch = 0
                print('num_images_processed =', num_images_processed)

                batch_images = list()
                if num_images_processed >= max_images_to_process:
                    print('Breaking loop: num_images_processed =',
                          num_images_processed, ', max_images_to_process =',
                          max_images_to_process)
                    break

        if len(batch_images) > 0:
            print('Completing remaining', len(batch_images), 'images')
            batch_images = np.concatenate(batch_images, 0)
            feed_dict = {images_placeholder: batch_images}
            output, activations = sess.run([net, all_layers],
                                           feed_dict=feed_dict)
            features = activations['resnet_v2_101/block4']
            batch_start_idx = num_batches_done * args.batch_size
            batch_end_idx = batch_start_idx + features.shape[0] - 1
            hpy5_dataset[batch_start_idx:batch_end_idx, :] = features
            num_batches_done += 1

    output_file_handle.close()
Exemplo n.º 7
0
with tf.Graph().as_default():
    url = ("https://upload.wikimedia.org/wikipedia/commons/d/d9/"
           "First_Student_IC_school_bus_202076.jpg")

    # Open specified url and load image as a string
    image_string = urllib2.urlopen(url).read()

    # Decode string into matrix with intensity values
    image = tf.image.decode_jpeg(image_string, channels=3)

    # Resize the input image, preserving the aspect ratio
    # and make a central crop of the resulted image.
    # The crop will be of the size of the default image size of
    # the network.
    processed_image = vgg_preprocessing.preprocess_image(image,
                                                         image_size,
                                                         image_size,
                                                         is_training=False)

    # Networks accept images in batches.
    # The first dimension usually represents the batch size.
    # In our case the batch size is one.
    processed_images = tf.expand_dims(processed_image, 0)

    # Create the model, use the default arg scope to configure
    # the batch norm parameters. arg_scope is a very conveniet
    # feature of slim library -- you can define default
    # parameters for layers -- like stride, padding etc.
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, _ = vgg.vgg_16(processed_images,
                               num_classes=1000,
                               is_training=False)