Пример #1
0
def irevnet300(
        x,  # pylint: disable=missing-docstring
        is_training,
        num_classes=1000,
        weight_decay=1e-4,
        activation_fn=tf.nn.relu,
        normalization_fn=batch_norm):

    unit = functools.partial(bottleneck_rev, simple=True)

    end_points = {}

    kernel_regularizer = l2_regularizer(scale=weight_decay)

    # Do inv. pooling first
    x = tf.space_to_depth(x, 2)

    layer_counts = [1, 6, 16, 72, 5]
    params = {
        'activation_fn': activation_fn,
        'normalization_fn': normalization_fn,
        'training': is_training,
        'kernel_regularizer': kernel_regularizer
    }

    for num_block, layer_count in enumerate(layer_counts):
        for _ in range(layer_count):
            x = unit(x, **params)
        end_points['block%i' % (num_block + 1)] = x
        if num_block < (len(layer_counts) - 1):
            x = tf.space_to_depth(x, 2)

    x = normalization_fn(x, training=is_training)
    end_points['last_invertible'] = x

    # Non-invertible part starts here
    x = activation_fn(x)
    x = tf.reduce_mean(x, axis=[1, 2], keepdims=True)

    end_points['pre_logits'] = tf.squeeze(x, [1, 2])
    logits = tf.squeeze(
        tf.layers.conv2d(x,
                         filters=num_classes,
                         kernel_size=1,
                         kernel_regularizer=kernel_regularizer), [1, 2])

    end_points['logits'] = logits

    return logits, end_points
Пример #2
0
 def build_graph(parameters):
   input_tensor = tf.compat.v1.placeholder(
       dtype=parameters["dtype"],
       name="input",
       shape=parameters["input_shape"])
   out = tf.space_to_depth(input_tensor, block_size=parameters["block_size"])
   return [input_tensor], [out]
Пример #3
0
def demosaic(bayer_images):
    """Bilinearly demosaics a batch of RGGB Bayer images."""
    bayer_images.shape.assert_is_compatible_with((None, None, None, 4))

    # This implementation exploits how edges are aligned when upsampling with
    # tf.image.resize_bilinear().

    with tf.name_scope(None, 'demosaic'):
        shape = tf.shape(bayer_images)
        shape = [shape[1] * 2, shape[2] * 2]

        red = bayer_images[Ellipsis, 0:1]
        red = tf.image.resize_bilinear(red, shape)

        green_red = bayer_images[Ellipsis, 1:2]
        green_red = tf.image.flip_left_right(green_red)
        green_red = tf.image.resize_bilinear(green_red, shape)
        green_red = tf.image.flip_left_right(green_red)
        green_red = tf.space_to_depth(green_red, 2)

        green_blue = bayer_images[Ellipsis, 2:3]
        green_blue = tf.image.flip_up_down(green_blue)
        green_blue = tf.image.resize_bilinear(green_blue, shape)
        green_blue = tf.image.flip_up_down(green_blue)
        green_blue = tf.space_to_depth(green_blue, 2)

        green_at_red = (green_red[Ellipsis, 0] + green_blue[Ellipsis, 0]) / 2
        green_at_green_red = green_red[Ellipsis, 1]
        green_at_green_blue = green_blue[Ellipsis, 2]
        green_at_blue = (green_red[Ellipsis, 3] + green_blue[Ellipsis, 3]) / 2

        green_planes = [
            green_at_red, green_at_green_red, green_at_green_blue,
            green_at_blue
        ]
        green = tf.depth_to_space(tf.stack(green_planes, axis=-1), 2)

        blue = bayer_images[Ellipsis, 3:4]
        blue = tf.image.flip_up_down(tf.image.flip_left_right(blue))
        blue = tf.image.resize_bilinear(blue, shape)
        blue = tf.image.flip_up_down(tf.image.flip_left_right(blue))

        rgb_images = tf.concat([red, green, blue], axis=-1)
        return rgb_images
Пример #4
0
    def encoder(self, x):
        if self.bf16:
            x = tf.cast(x, tf.bfloat16)

        if self.stack_factor > 1:
            x = tf.space_to_depth(x, self.stack_factor)

        with tf.variable_scope("encoder"):
            for block, (stack, channels) in enumerate(self.convblocks):
                with tf.variable_scope(f"block_{block}"):
                    for i in range(stack):
                        with tf.variable_scope(f"layer_{i}"):
                            if i == 0:
                                # downsample
                                x = self.conv2d(x,
                                                channels, (4, 4), (2, 2),
                                                padding="SAME",
                                                name=f"conv_downsample")
                            else:
                                # normal residual block

                                def encoder_block(x, channels=channels):
                                    out = self.conv2d(x,
                                                      channels, (3, 3), (1, 1),
                                                      padding="SAME",
                                                      name=f"conv_in")
                                    # out = self.norm(out, name=f"bn_in")
                                    out = self.activation(out, name=f"activ")
                                    out = self.conv2d(out,
                                                      channels, (3, 3), (1, 1),
                                                      padding="SAME",
                                                      name=f"conv_out")
                                    # out = self.norm(out, name=f"bn_out")
                                    return out

                                res_out = recompute_grad(
                                    encoder_block, self.bf16
                                )(x) if self.recompute_grad else encoder_block(
                                    x)

                                x = x + res_out

        with tf.variable_scope(f"codebook"):
            self.n_hid = x.shape[-1]
            embedding = tf.get_variable("codebook",
                                        shape=[self.n_hid, self.num_tokens],
                                        dtype=tf.float32)

            if self.bf16:
                x = tf.cast(x, tf.float32)

            output = tf.matmul(x, embedding)

            return output
def position_sensitive_crop_regions(image,
                                    boxes,
                                    crop_size,
                                    num_spatial_bins,
                                    global_pool):
  """Position-sensitive crop and pool rectangular regions from a feature grid.

  The output crops are split into `spatial_bins_y` vertical bins
  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
  and a horizontal bin the output values are gathered by performing
  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
  channels of the image. This reduces `depth` by a factor of
  `(spatial_bins_y * spatial_bins_x)`.

  When global_pool is True, this function implements a differentiable version
  of position-sensitive RoI pooling used in
  [R-FCN detection system](https://arxiv.org/abs/1605.06409).

  When global_pool is False, this function implements a differentiable version
  of position-sensitive assembling operation used in
  [instance FCN](https://arxiv.org/abs/1603.08678).

  Args:
    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 3-D tensor of shape `[image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: A list of two integers `[crop_height, crop_width]`. All
      cropped image patches are resized to this size. The aspect ratio of the
      image content is not preserved. Both `crop_height` and `crop_width` need
      to be positive.
    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
      Represents the number of position-sensitive bins in y and x directions.
      Both values should be >= 1. `crop_height` should be divisible by
      `spatial_bins_y`, and similarly for width.
      The number of image channels should be divisible by
      (spatial_bins_y * spatial_bins_x).
      Suggested value from R-FCN paper: [3, 3].
    global_pool: A boolean variable.
      If True, we perform average global pooling on the features assembled from
        the position-sensitive score maps.
      If False, we keep the position-pooled features without global pooling
        over the spatial coordinates.
      Note that using global_pool=True is equivalent to but more efficient than
        running the function with global_pool=False and then performing global
        average pooling.

  Returns:
    position_sensitive_features: A 4-D tensor of shape
      `[num_boxes, K, K, crop_channels]`,
      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
      where K = 1 when global_pool is True (Average-pooled cropped regions),
      and K = crop_size when global_pool is False.
  Raises:
    ValueError: Raised in four situations:
      `num_spatial_bins` is not >= 1;
      `num_spatial_bins` does not divide `crop_size`;
      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
      `bin_crop_size` is not square when global_pool=False due to the
        constraint in function space_to_depth.
  """
  total_bins = 1
  bin_crop_size = []

  for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
    if num_bins < 1:
      raise ValueError('num_spatial_bins should be >= 1')

    if crop_dim % num_bins != 0:
      raise ValueError('crop_size should be divisible by num_spatial_bins')

    total_bins *= num_bins
    bin_crop_size.append(crop_dim // num_bins)

  if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
    raise ValueError('Only support square bin crop size for now.')

  ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
  spatial_bins_y, spatial_bins_x = num_spatial_bins

  # Split each box into spatial_bins_y * spatial_bins_x bins.
  position_sensitive_boxes = []
  for bin_y in range(spatial_bins_y):
    step_y = (ymax - ymin) / spatial_bins_y
    for bin_x in range(spatial_bins_x):
      step_x = (xmax - xmin) / spatial_bins_x
      box_coordinates = [ymin + bin_y * step_y,
                         xmin + bin_x * step_x,
                         ymin + (bin_y + 1) * step_y,
                         xmin + (bin_x + 1) * step_x,
                        ]
      position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))

  image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2)

  image_crops = []
  for (split, box) in zip(image_splits, position_sensitive_boxes):
    if split.shape.is_fully_defined() and box.shape.is_fully_defined():
      crop = tf.squeeze(
          matmul_crop_and_resize(
              tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0),
              bin_crop_size),
          axis=0)
    else:
      crop = tf.image.crop_and_resize(
          tf.expand_dims(split, 0), box,
          tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size)
    image_crops.append(crop)

  if global_pool:
    # Average over all bins.
    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
    # Then average over spatial positions within the bins.
    position_sensitive_features = tf.reduce_mean(
        position_sensitive_features, [1, 2], keepdims=True)
  else:
    # Reorder height/width to depth channel.
    block_size = bin_crop_size[0]
    if block_size >= 2:
      image_crops = [tf.space_to_depth(
          crop, block_size=block_size) for crop in image_crops]

    # Pack image_crops so that first dimension is for position-senstive boxes.
    position_sensitive_features = tf.stack(image_crops, axis=0)

    # Unroll the position-sensitive boxes to spatial positions.
    position_sensitive_features = tf.squeeze(
        tf.batch_to_space_nd(position_sensitive_features,
                             block_shape=[1] + num_spatial_bins,
                             crops=tf.zeros((3, 2), dtype=tf.int32)),
        axis=[0])

    # Reorder back the depth channel.
    if block_size >= 2:
      position_sensitive_features = tf.depth_to_space(
          position_sensitive_features, block_size=block_size)

  return position_sensitive_features
Пример #6
0
  def input_fn(params):
    """Input function which provides a single batch for train or eval."""
    batch_size = params['batch_size']
    index = params['dataset_index']
    num_hosts = params['dataset_num_shards']
    num_dataset_per_shard = max(
        1,
        int(
            math.ceil(FLAGS.num_eval_images / FLAGS.eval_batch_size) *
            FLAGS.eval_batch_size / num_hosts))
    padded_dataset = tf.data.Dataset.from_tensors(
        tf.constant(
            tf.train.Example(
                features=tf.train.Features(
                    feature={
                        'image/class/label':
                            tf.train.Feature(
                                int64_list=tf.train.Int64List(value=[-1])),
                        'image/encoded':
                            tf.train.Feature(
                                bytes_list=tf.train.BytesList(
                                    value=[str.encode('')]))
                    })).SerializeToString(),
            dtype=tf.string)).repeat(num_dataset_per_shard)

    if data_dir is None:
      dataset = padded_dataset.repeat(batch_size * 100)
    else:
      file_pattern = os.path.join(data_dir,
                                  'train-*' if is_training else 'validation-*')
      dataset = tf.data.Dataset.list_files(file_pattern, shuffle=False)
      dataset = dataset.shard(num_hosts, index)
      dataset = dataset.interleave(tf.data.TFRecordDataset, 64, 1, 64)

      if is_training:
        dataset = dataset.cache().shuffle(shuffle_size).repeat(100)
      else:
        dataset = dataset.concatenate(padded_dataset).take(
            num_dataset_per_shard)

    if cache_decoded_image and is_training:
      dataset = dataset.map(cached_parser,
                            64).repeat().map(crop_image,
                                             64).batch(batch_size, True)
    else:
      dataset = dataset.map(dataset_parser, 64).batch(batch_size, True)

    if FLAGS.use_space_to_depth:
      dataset = dataset.map(
          lambda images, labels: (tf.space_to_depth(images, 2), labels), 64)
    # Transpose for performance on TPU
    if FLAGS.train_batch_size // FLAGS.num_replicas > 8:
      transpose_array = [1, 2, 3, 0]
    else:
      transpose_array = [1, 2, 0, 3]
    dataset = dataset.map(
        lambda imgs, labels: (tf.transpose(imgs, transpose_array), labels), 64)
    dataset = dataset.map(functools.partial(set_shapes, batch_size), 64)
    dataset = dataset.prefetch(10)

    options = tf.data.Options()
    options.experimental_deterministic = False
    options.experimental_threading.max_intra_op_parallelism = 1
    options.experimental_threading.private_threadpool_size = 48
    dataset = dataset.with_options(options)
    return dataset
Пример #7
0
def reorg(x, stride):
    return tf.space_to_depth(x, block_size=stride)
def inference(
    input_lr_dir,
    input_hr_dir,
    input_dir_len,
    num_resblock,
    vsr_scale,
    checkpoint_path,
    output_dir,
    output_pre,
    output_name,
    output_ext,
):
  """Main inference function."""
  if checkpoint_path is None:
    raise ValueError('The checkpoint file is needed to performing the test.')

  # Declare the test data reader
  inference_data = inference_data_loader(input_lr_dir, input_hr_dir,
                                         input_dir_len)
  input_shape = [
      1,
  ] + list(inference_data.inputs[0].shape)
  output_shape = [1, input_shape[1] * vsr_scale, input_shape[2] * vsr_scale, 3]
  oh = input_shape[1] - input_shape[1] // 8 * 8
  ow = input_shape[2] - input_shape[2] // 8 * 8
  paddings = tf.constant([[0, 0], [0, oh], [0, ow], [0, 0]])
  print('input shape:', input_shape)
  print('output shape:', output_shape)

  # build the graph
  inputs_raw = tf.placeholder(tf.float32, shape=input_shape, name='inputs_raw')

  pre_inputs = tf.Variable(
      tf.zeros(input_shape), trainable=False, name='pre_inputs')
  pre_gen = tf.Variable(tf.zeros(output_shape), trainable=False, name='pre_gen')
  pre_warp = tf.Variable(
      tf.zeros(output_shape), trainable=False, name='pre_warp')

  transpose_pre = tf.space_to_depth(pre_warp, vsr_scale)
  inputs_all = tf.concat((inputs_raw, transpose_pre), axis=-1)
  with tf.variable_scope('generator'):
    gen_output = generator_f(
        inputs_all, 3, num_resblock, vsr_scale, reuse=False)
    # Deprocess the images outputed from the model, and assign things for next
    # frame
    with tf.control_dependencies([tf.assign(pre_inputs, inputs_raw)]):
      outputs = tf.assign(pre_gen, ops.deprocess(gen_output))

  inputs_frames = tf.concat((pre_inputs, inputs_raw), axis=-1)
  with tf.variable_scope('fnet'):
    gen_flow_lr = fnet(inputs_frames, reuse=False)
    gen_flow_lr = tf.pad(gen_flow_lr, paddings, 'SYMMETRIC')

    deconv_flow = gen_flow_lr
    deconv_flow = ops.conv2_tran(
        deconv_flow, 3, 64, 2, scope='deconv_flow_tran1')
    deconv_flow = tf.nn.relu(deconv_flow)
    deconv_flow = ops.conv2_tran(
        deconv_flow, 3, 64, 2, scope='deconv_flow_tran2')
    deconv_flow = tf.nn.relu(deconv_flow)
    deconv_flow = ops.conv2(deconv_flow, 3, 2, 1, scope='deconv_flow_conv')
    gen_flow = ops.upscale_x(gen_flow_lr * 4.0, scale=vsr_scale)
    gen_flow = deconv_flow + gen_flow

    gen_flow.set_shape(output_shape[:-1] + [2])
  pre_warp_hi = tfa.image.dense_image_warp(pre_gen, gen_flow)
  pre_warp_hi = pre_warp_hi + extract_detail_ops(pre_warp_hi)
  before_ops = tf.assign(pre_warp, pre_warp_hi)

  print('Finish building the network')

  if FLAGS.use_ema:
    moving_average_decay = 0.99
    global_step = tf.train.get_or_create_global_step()
    ema = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
    ema_vars = _get_ema_vars()

  # In inference time, we only need to restore the weight of the generator
  var_list = tf.trainable_variables()

  restore_vars_dict = {}
  if FLAGS.use_ema:
    for v in var_list:
      if re.match(v.name, '.*global_step.*'):
        restore_vars_dict[v.name[:-2]] = v
      else:
        restore_vars_dict[v.name[:-2] + '/ExponentialMovingAverage'] = v
  else:
    restore_vars_dict = var_list

  weight_initiallizer = tf.train.Saver(restore_vars_dict)

  # Define the initialization operation
  init_op = tf.global_variables_initializer()
  local_init_op = tf.local_variables_initializer()

  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  if not gfile.exists(output_dir):
    gfile.mkdir(output_dir)
  if not output_pre:
    image_dir = output_dir
  else:
    image_dir = os.path.join(output_dir, output_pre)
  if not gfile.exists(image_dir):
    gfile.mkdir(image_dir)

  with tf.Session(config=config) as sess:
    # Load the pretrained model
    sess.run(init_op)
    sess.run(local_init_op)

    print('Loading weights from ckpt model')
    weight_initiallizer.restore(sess, checkpoint_path)
    max_iter = len(inference_data.inputs)

    srtime = 0
    print('Frame evaluation starts!!')
    for i in range(max_iter):
      input_im = np.array([inference_data.inputs[i]]).astype(np.float32)
      feed_dict = {inputs_raw: input_im}
      t0 = time.time()
      if i != 0:
        sess.run(before_ops, feed_dict=feed_dict)
      output_frame = sess.run(outputs, feed_dict=feed_dict)
      srtime += time.time() - t0

      if i >= 5:
        name, _ = os.path.splitext(
            os.path.basename(str(inference_data.paths_LR[i])))
        filename = output_name + '_' + name
        out_path = os.path.join(image_dir, '%s.%s' % (filename, output_ext))
        print('saving image %s' % out_path)
        with tf.gfile.Open(out_path, 'wb') as image_file:
          img = np.clip(output_frame[0] * 255.0, 0, 255).astype(np.uint8)
          _, buff = cv2.imencode('.png', img[:, :, ::-1])
          image_file.write(buff.tostring())

      else:
        print('Warming up %d' % (5 - i))
  tf.reset_default_graph()
  print('total time ' + str(srtime) + ', frame number ' + str(max_iter))
Пример #9
0
 def _PDS(self, X, r):
     X = tf.space_to_depth(X, r)
     return X
Пример #10
0
    # build the graph
    inputs_raw = tf.placeholder(tf.float32,
                                shape=input_shape,
                                name='inputs_raw')

    pre_inputs = tf.Variable(tf.zeros(input_shape),
                             trainable=False,
                             name='pre_inputs')
    pre_gen = tf.Variable(tf.zeros(output_shape),
                          trainable=False,
                          name='pre_gen')
    pre_warp = tf.Variable(tf.zeros(output_shape),
                           trainable=False,
                           name='pre_warp')

    transpose_pre = tf.space_to_depth(pre_warp, 4)
    inputs_all = tf.concat((inputs_raw, transpose_pre), axis=-1)
    with tf.variable_scope('generator'):
        gen_output = generator_F(inputs_all, 3, reuse=False, FLAGS=FLAGS)
        # Deprocess the images outputed from the model, and assign things for next frame
        with tf.control_dependencies([tf.assign(pre_inputs, inputs_raw)]):
            outputs = tf.assign(pre_gen, deprocess(gen_output))

    inputs_frames = tf.concat((pre_inputs, inputs_raw), axis=-1)
    with tf.variable_scope('fnet'):
        gen_flow_lr = fnet(inputs_frames, reuse=False)
        gen_flow_lr = tf.pad(gen_flow_lr, paddings, "SYMMETRIC")
        gen_flow = upscale_four(gen_flow_lr * 4.0)
        gen_flow.set_shape(output_shape[:-1] + [2])
    pre_warp_hi = tfa.image.dense_image_warp(pre_gen, gen_flow)
    before_ops = tf.assign(pre_warp, pre_warp_hi)