def preprocess_for_eval(image,
                        label,
                        output_height,
                        output_width,
                        resize_side,
                        speed_mode=False):
    if speed_mode:
        image = tf.image.resize_images(image, [output_height, output_width],
                                       tf.image.ResizeMethod.BILINEAR)
        image = tf.reshape(image, [output_height, output_width, 3])
        image = tf.to_float(image)
        image = vgg_preprocessing._mean_image_subtraction(image)

        label = tf.image.resize_images(label, [output_height, output_width],
                                       tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        label = tf.reshape(label, [output_height, output_width])
    else:
        image = vgg_preprocessing._aspect_preserving_resize(
            image, resize_side, 3, 'bilinear')
        image = vgg_preprocessing._central_crop([image], output_height,
                                                output_width)[0]
        image = tf.reshape(image, [output_height, output_width, 3])
        image = tf.to_float(image)
        image = vgg_preprocessing._mean_image_subtraction(image)

        label = vgg_preprocessing._aspect_preserving_resize(
            label, resize_side, 1, 'nearest')
        label = vgg_preprocessing._central_crop([label], output_height,
                                                output_width)[0]
        label = tf.reshape(label, [output_height, output_width])

    return image, label
Exemplo n.º 2
0
def preprocess_for_train(image,
                         output_height,
                         output_width,
                         resize_side_min,
                         resize_side_max,
                         speed_mode=False):
  if speed_mode:
    image = tf.image.resize_images(
      image, [output_height, output_width],
      tf.image.ResizeMethod.BILINEAR)
    image = tf.reshape(image, [output_height, output_width, 3])
    image = tf.to_float(image)
    image = vgg_preprocessing._mean_image_subtraction(image)
  else:
    resize_side = tf.random_uniform(
      [],
      minval=resize_side_min,
      maxval=resize_side_max + 1,
      dtype=tf.int32)
    image = vgg_preprocessing._aspect_preserving_resize(
      image, resize_side, 3, 'bilinear')
    image = vgg_preprocessing._central_crop(
      [image], output_height, output_width)[0]
    image = tf.reshape(image, [output_height, output_width, 3])
    image = tf.to_float(image)
    image = vgg_preprocessing._mean_image_subtraction(image)

  return image
    def input_fn(self, test_samples=[]):
        if self.config.mode == "export":
            image = tf.placeholder(tf.float32,
                                   shape=(None, None, 3),
                                   name="input_image")
            image = tf.to_float(image)
            image = vgg_preprocessing._mean_image_subtraction(image)
            image = tf.expand_dims(image, 0)
            return image
        else:
            batch_size = (self.config.batch_size_per_gpu *
                          self.config.gpu_count)

            samples = self.get_samples_fn()

            dataset = tf.data.Dataset.from_tensor_slices(samples)

            if self.config.mode == "train":
                dataset = dataset.shuffle(self.get_num_samples())

            dataset = dataset.repeat(self.config.epochs)

            dataset = dataset.map(lambda image: self.parse_fn(image),
                                  num_parallel_calls=4)

            dataset = dataset.apply(
                tf.contrib.data.batch_and_drop_remainder(batch_size))

            dataset = dataset.prefetch(2)

            iterator = dataset.make_one_shot_iterator()
            return iterator.get_next()
Exemplo n.º 4
0
  def parse_fn(self, image_path, label_path):
    """Parse a single input sample
    """
    image = tf.read_file(image_path)
    image = tf.image.decode_png(image, channels=self.config.image_depth)

    if self.config.mode == "infer":
      image = tf.to_float(image)
      image = vgg_preprocessing._mean_image_subtraction(image)
      label = image[0]
      return image, label
    else:
      label = tf.read_file(label_path)
      label = tf.image.decode_png(label, channels=1)
      label = tf.cast(label, dtype=tf.int64)

      if self.augmenter:
        is_training = (self.config.mode == "train")
        return self.augmenter.augment(image, label,
                                      self.config.output_height,
                                      self.config.output_width,
                                      self.config.resize_side_min,
                                      self.config.resize_side_max,
                                      is_training=is_training,
                                      speed_mode=self.config.augmenter_speed_mode)
Exemplo n.º 5
0
  def compute_style_feature(self):
    style_image = tf.read_file(self.config.style_image_path)
    style_image = \
        tf.image.decode_jpeg(style_image,
                             channels=self.config.image_depth,
                             dct_method="INTEGER_ACCURATE")
    style_image = tf.to_float(style_image)
    style_image = vgg_preprocessing._mean_image_subtraction(style_image)
    style_image = tf.expand_dims(style_image, 0)

    (logits, features), self.feature_net_init_flag = self.feature_net(
      style_image, self.config.data_format,
      is_training=False, init_flag=self.feature_net_init_flag,
      ckpt_path=self.config.feature_net_path)

    self.style_features_target_op = {}
    for style_layer in self.style_layers:
      layer = features[style_layer]
      self.style_features_target_op[style_layer] = \
          self.compute_gram(layer, self.config.data_format)

    return self.style_features_target_op
    def parse_fn(self, image_path):
        """Parse a single input sample
    """
        image = tf.read_file(image_path)
        image = tf.image.decode_jpeg(image,
                                     channels=self.config.image_depth,
                                     dct_method="INTEGER_ACCURATE")

        if self.config.mode == "infer":
            image = tf.to_float(image)
            image = vgg_preprocessing._mean_image_subtraction(image)
        else:
            if self.augmenter:
                is_training = (self.config.mode == "train")
                image = self.augmenter.augment(
                    image,
                    self.config.image_height,
                    self.config.image_width,
                    self.config.resize_side_min,
                    self.config.resize_side_max,
                    is_training=is_training,
                    speed_mode=self.config.augmenter_speed_mode)
        return (image, )
Exemplo n.º 7
0
    def create_loss_fn(self, outputs, inputs):
        """Create loss operator
    Returns:
      loss
    """
        self.gether_train_vars()

        (logits,
         vgg_net_target), self.feature_net_init_flag = self.feature_net(
             inputs,
             self.config.data_format,
             is_training=False,
             init_flag=self.feature_net_init_flag,
             ckpt_path=self.config.feature_net_path)
        content_features_target = {}
        content_features_target[self.content_layers] = (
            vgg_net_target[self.content_layers])

        outputs_mean_subtracted = vgg_preprocessing._mean_image_subtraction(
            outputs)

        (logits,
         vgg_net_source), self.feature_net_init_flag = self.feature_net(
             outputs_mean_subtracted,
             self.config.data_format,
             is_training=False,
             init_flag=self.feature_net_init_flag,
             ckpt_path=self.config.feature_net_path)

        content_features_source = {}
        content_features_source[self.content_layers] = (
            vgg_net_source[self.content_layers])

        style_features_source = {}
        for style_layer in self.style_layers:
            layer = vgg_net_source[style_layer]
            style_features_source[style_layer] = \
                self.compute_gram(layer, self.config.data_format)

        # Content loss
        content_size = tf.to_float(
            (self.tensor_size(content_features_source[self.content_layers]) *
             self.config.batch_size_per_gpu))

        loss_content = (
            self.config.content_weight *
            (2 * tf.nn.l2_loss(content_features_source[self.content_layers] -
                               content_features_target[self.content_layers]) /
             content_size))

        # Style loss
        style_loss = []
        for style_layer in self.style_layers:
            style_size = tf.to_float(
                self.tensor_size(self.style_features_target[style_layer]))
            style_loss.append(
                2 * tf.nn.l2_loss(style_features_source[style_layer] -
                                  self.style_features_target[style_layer]) /
                style_size)
        loss_style = (self.config.style_weight * tf.reduce_sum(style_loss) /
                      self.config.batch_size_per_gpu)

        # TV loss
        loss_tv = self.compute_tv_loss(outputs, self.config.data_format,
                                       self.config.tv_weight,
                                       self.config.batch_size_per_gpu)

        # L2 loss
        loss_l2 = self.l2_regularization()

        loss = tf.identity(loss_l2 + loss_content + loss_style + loss_tv,
                           name="loss")
        return loss