def visualize_image_sequence(est, dataset_name, dataset_parent_dir,
                             input_sequence_name, app_base_path, output_dir):
  """Generates an image sequence as a video and stores it to disk."""
  batch_sz = opts.batch_size
  def input_seq_fn():
    dict_inp = data.provide_data(
        dataset_name=dataset_name, parent_dir=dataset_parent_dir,
        subset=input_sequence_name, batch_size=batch_sz, crop_flag=False,
        seeds=None, use_appearance=False, shuffle=0)
    x_in = dict_inp['conditional_input']
    return x_in

  # Compute appearance embedding only once and use it for all input frames.
  app_rgb_path = app_base_path + '_reference.png'
  app_rendered_path = app_base_path + '_color.png'
  app_depth_path = app_base_path + '_depth.png'
  app_sem_path = app_base_path + '_seg_rgb.png'
  x_app = _load_and_concatenate_image_channels(
      app_rgb_path, app_rendered_path, app_depth_path, app_sem_path)
  def seq_with_single_appearance_inp_fn():
    """input frames with a fixed latent appearance vector."""
    x_in_op = input_seq_fn()
    x_app_op = tf.convert_to_tensor(x_app)
    x_app_tiled_op = tf.tile(x_app_op, [tf.shape(x_in_op)[0], 1, 1, 1])
    return {'conditional_input': x_in_op,
            'peek_input': x_app_tiled_op}

  images = [x for x in est.predict(seq_with_single_appearance_inp_fn)]
  for i, gen_img in enumerate(images):
    output_file_path = osp.join(output_dir, 'out_%04d.png' % i)
    print('Saving frame #%d to %s' % (i, output_file_path))
    with tf.gfile.Open(output_file_path, 'wb') as f:
      f.write(utils.to_png(gen_img))
def infer_dir(model_dir, input_dir, output_dir):
  tf.gfile.MakeDirs(output_dir)
  est = _build_inference_estimator(opts.train_dir)

  def read_image(base_path, is_appearance=False):
    if is_appearance:
      ref_img_path = base_path + '_reference.png'
    else:
      ref_img_path = None
    rendered_img_path = base_path + '_color.png'
    depth_img_path = base_path + '_depth.png'
    seg_img_path = base_path + '_seg_rgb.png'
    img = _load_and_concatenate_image_channels(
        rgb_path=ref_img_path, rendered_path=rendered_img_path,
        depth_path=depth_img_path, seg_path=seg_img_path)
    return img

  def get_inference_input_fn(base_path, app_base_path):
    x_in = read_image(base_path, False)
    x_app_in = read_image(app_base_path, True)
    def infer_input_fn():
      return {'conditional_input': x_in, 'peek_input': x_app_in}
    return infer_input_fn

  file_paths = sorted(glob.glob(osp.join(input_dir, '*_depth.png')))
  base_paths = [x[:-10] for x in file_paths]  # remove the '_depth.png' suffix
  for inp_base_path in base_paths:
    est_inp_fn = get_inference_input_fn(inp_base_path, inp_base_path)
    img = next(est.predict(est_inp_fn))
    basename = osp.basename(inp_base_path)
    output_img_path = osp.join(output_dir, basename + '_out.png')
    print('Saving generated image to %s' % output_img_path)
    with tf.gfile.Open(output_img_path, 'wb') as f:
      f.write(utils.to_png(img))
예제 #3
0
def evaluate_image_set(dataset_name,
                       dataset_parent_dir,
                       subset_suffix,
                       output_dir=None,
                       batch_size=6):
    if output_dir is None:
        output_dir = osp.join(opts.train_dir,
                              'validation_output_%s' % subset_suffix)
    tf.gfile.MakeDirs(output_dir)
    model_fn_old = build_model_fn()

    def model_fn_wrapper(features, labels, mode, params):
        del mode
        return model_fn_old(features, labels, 'eval_subset', params)

    model_dir = opts.train_dir
    est = tf.estimator.Estimator(model_fn_wrapper, model_dir)
    est_inp_fn = functools.partial(data.provide_data,
                                   dataset_name=dataset_name,
                                   parent_dir=dataset_parent_dir,
                                   subset=subset_suffix,
                                   batch_size=batch_size,
                                   use_appearance=opts.use_appearance,
                                   shuffle=0)

    print('Evaluating images for subset %s' % subset_suffix)
    images = [x for x in est.predict(est_inp_fn)]
    print('Evaluated %d images' % len(images))
    for i, img in enumerate(images):
        output_file_path = osp.join(output_dir, 'out_%04d.png' % i)
        print('Saving file #%d: %s' % (i, output_file_path))
        with tf.gfile.Open(output_file_path, 'wb') as f:
            f.write(utils.to_png(img))
예제 #4
0
def make_sample_grid_and_save(est, dataset_name, dataset_parent_dir, grid_dims,
                              output_dir, cur_nimg):
    """Evaluate a fixed set of validation images and save output.

  Args:
    est: tf,estimator.Estimator, TF estimator to run the predictions.
    dataset_name: basename for the validation tfrecord from which to load
      validation images.
    dataset_parent_dir: path to a directory containing the validation tfrecord.
    grid_dims: 2-tuple int for the grid size (1 unit = 1 image).
    output_dir: string, where to save image samples.
    cur_nimg: int, current number of images seen by training.

  Returns:
    None.
  """
    num_examples = grid_dims[0] * grid_dims[1]

    def input_val_fn():
        dict_inp = data.provide_data(dataset_name=dataset_name,
                                     parent_dir=dataset_parent_dir,
                                     subset='val',
                                     batch_size=1,
                                     crop_flag=True,
                                     crop_size=opts.train_resolution,
                                     seeds=[0],
                                     max_examples=num_examples,
                                     use_appearance=opts.use_appearance,
                                     shuffle=0)
        x_in = dict_inp['conditional_input']
        x_gt = dict_inp['expected_output']  # ground truth output
        x_app = dict_inp['peek_input']
        return x_in, x_gt, x_app

    def est_input_val_fn():
        x_in, _, x_app = input_val_fn()
        features = {'conditional_input': x_in, 'peek_input': x_app}
        return features

    images = [x for x in est.predict(est_input_val_fn)]
    images = np.array(images, 'f')
    images = images.reshape(grid_dims + images.shape[1:])
    utils.save_images(utils.to_png(utils.images_to_grid(images)), output_dir,
                      cur_nimg)
예제 #5
0
    def __call__(self, image_rois):
        if not self.valid_image(image_rois[0]):
            return

        # TODO: replace with cv2
        img = np.asarray(Image.open(image_rois[0]))
        i = 0
        detection_file = self.classification_path + "/val.txt"
        with open(detection_file, "a+") as label_file:
            for roi in image_rois[1]:
                # TODO: check why?
                try:
                    class_id = "false_pos" if roi.false_pos else conf.type2class_id[
                        roi.r_type]
                    roi_img = img[roi.tl_row:roi.br_row, roi.tl_col:roi.br_col]

                    r, g, b = roi_img.T
                    roi_img_bgr = np.array((b, g, r)).T

                    roi_img_path = self.classification_path + "/" + str(
                        class_id) + "/"
                    roi_img_path += str(i) + "_" + utils.to_png(image_rois[0])
                    # TODO: replace with cv2
                    Image.fromarray(roi_img_bgr).save(roi_img_path)
                    i = i + 1
                    id = 0
                    if class_id == "false_pos":
                        id = conf.invalid_id
                    else:
                        id = class_id
                    label_file.write(
                        roi_img_path[len(self.classification_path):] + " " +
                        str(id))
                    label_file.write("\n")
                except ValueError:
                    continue
                    print("Rect goes outside in image {}.".format(
                        image_rois[0]))
                except KeyError:
                    continue
예제 #6
0
    def __call__(self, image_rois):
        # TODO: cv2
        img = Image.open(image_rois[0])
        label = np.zeros((img.size[1], img.size[0]), np.uint8)

        for roi in image_rois[1]:
            # class_id = conf.type2class_id[roi.r_type]
            # For the moment we only support binary segmentation
            class_id = 1
            label[roi.tl_row:roi.br_row, roi.tl_col:roi.br_col] = class_id

        label_img = Image.fromarray(label, 'P')
        label_img.putpalette(conf.palette)
        label_path = self.labels_path + utils.to_png(image_rois[0])
        image.resize_image(label_img,
                           (conf.image_size[0] // 2,
                            conf.image_size[1] // 2))[0].save(label_path)

        label_file_path = self.labels_path + "/val.txt"
        with open(label_file_path, "a+") as label_file:
            label_file.write(label_path[len(self.labels_path):])
            label_file.write("\n")
def interpolate_appearance(model_dir, input_dir, target_img_basename,
                           appearance_img1_basename, appearance_img2_basename):
  # Create output direcotry
  output_dir = osp.join(model_dir, 'interpolate_appearance_out')
  tf.gfile.MakeDirs(output_dir)

  # Build estimator
  model_fn_old = build_model_fn()
  def model_fn_wrapper(features, labels, mode, params):
    del mode
    return model_fn_old(features, labels, 'interpolate_appearance', params)
  def appearance_model_fn(features, labels, mode, params):
    del mode
    return model_fn_old(features, labels, 'compute_appearance', params)
  config = tf.estimator.RunConfig(
      save_summary_steps=1000, save_checkpoints_steps=50000,
      keep_checkpoint_max=50, log_step_count_steps=1 << 30)
  model_dir = model_dir
  est = tf.estimator.Estimator(model_fn_wrapper, model_dir, config, params={})
  est_app = tf.estimator.Estimator(appearance_model_fn, model_dir, config,
                                   params={})

  # Compute appearance embeddings for the two input appearance images.
  app_inputs = []
  for app_basename in [appearance_img1_basename, appearance_img2_basename]:
    app_rgb_path = osp.join(input_dir, app_basename + '_reference.png')
    app_rendered_path = osp.join(input_dir, app_basename + '_color.png')
    app_depth_path = osp.join(input_dir, app_basename + '_depth.png')
    app_seg_path = osp.join(input_dir, app_basename + '_seg_rgb.png')
    app_in = _load_and_concatenate_image_channels(
        rgb_path=app_rgb_path, rendered_path=app_rendered_path,
        depth_path=app_depth_path, seg_path=app_seg_path)
    # app_inputs.append(tf.convert_to_tensor(app_in))
    app_inputs.append(app_in)

  embedding1 = next(est_app.predict(
      lambda: {'peek_input': app_inputs[0]}))
  embedding2 = next(est_app.predict(
      lambda: {'peek_input': app_inputs[1]}))
  embedding1 = np.expand_dims(embedding1, axis=0)
  embedding2 = np.expand_dims(embedding2, axis=0)

  # Compute interpolated appearance embeddings
  num_interpolations = 10
  interpolated_embeddings = []
  delta_vec = (embedding2 - embedding1) / num_interpolations
  for delta_iter in range(num_interpolations + 1):
    x_app_embedding = embedding1 + delta_iter * delta_vec
    interpolated_embeddings.append(x_app_embedding)

  # Read in the generator input for the target image to render
  rendered_img_path = osp.join(input_dir, target_img_basename + '_color.png')
  depth_img_path = osp.join(input_dir, target_img_basename + '_depth.png')
  seg_img_path = osp.join(input_dir, target_img_basename + '_seg_rgb.png')
  x_in = _load_and_concatenate_image_channels(
      rgb_path=None, rendered_path=rendered_img_path,
      depth_path=depth_img_path, seg_path=seg_img_path)

  # Generate and save interpolated images
  for interpolate_iter, embedding in enumerate(interpolated_embeddings):
    img = next(est.predict(
        lambda: {'conditional_input': tf.convert_to_tensor(x_in),
                 'appearance_embedding': tf.convert_to_tensor(embedding)}))
    output_img_name = 'interpolate_%s_%s_%s_%03d.png' % (
        target_img_basename, appearance_img1_basename, appearance_img2_basename,
        interpolate_iter)
    output_img_path = osp.join(output_dir, output_img_name)
    print('Saving interpolated image to %s' % output_img_path)
    with tf.gfile.Open(output_img_path, 'wb') as f:
      f.write(utils.to_png(img))
def joint_interpolation(model_dir, app_input_dir, st_app_basename,
                        end_app_basename, camera_path_dir):
  """
  Interpolates both viewpoint and appearance between two input images.
  """
  # Create output direcotry
  output_dir = osp.join(model_dir, 'joint_interpolation_out')
  tf.gfile.MakeDirs(output_dir)

  # Build estimator
  model_fn_old = build_model_fn()
  def model_fn_wrapper(features, labels, mode, params):
    del mode
    return model_fn_old(features, labels, 'interpolate_appearance', params)
  def appearance_model_fn(features, labels, mode, params):
    del mode
    return model_fn_old(features, labels, 'compute_appearance', params)
  config = tf.estimator.RunConfig(
      save_summary_steps=1000, save_checkpoints_steps=50000,
      keep_checkpoint_max=50, log_step_count_steps=1 << 30)
  model_dir = model_dir
  est = tf.estimator.Estimator(model_fn_wrapper, model_dir, config, params={})
  est_app = tf.estimator.Estimator(appearance_model_fn, model_dir, config,
                                   params={})

  # Compute appearance embeddings for the two input appearance images.
  app_inputs = []
  for app_basename in [st_app_basename, end_app_basename]:
    app_rgb_path = osp.join(app_input_dir, app_basename + '_reference.png')
    app_rendered_path = osp.join(app_input_dir, app_basename + '_color.png')
    app_depth_path = osp.join(app_input_dir, app_basename + '_depth.png')
    app_seg_path = osp.join(app_input_dir, app_basename + '_seg_rgb.png')
    app_in = _load_and_concatenate_image_channels(
        rgb_path=app_rgb_path, rendered_path=app_rendered_path,
        depth_path=app_depth_path, seg_path=app_seg_path)
    # app_inputs.append(tf.convert_to_tensor(app_in))
    app_inputs.append(app_in)

  embedding1 = next(est_app.predict(
      lambda: {'peek_input': app_inputs[0]}))
  embedding1 = np.expand_dims(embedding1, axis=0)
  embedding2 = next(est_app.predict(
      lambda: {'peek_input': app_inputs[1]}))
  embedding2 = np.expand_dims(embedding2, axis=0)

  file_paths = sorted(glob.glob(osp.join(camera_path_dir, '*_depth.png')))
  base_paths = [x[:-10] for x in file_paths]  # remove the '_depth.png' suffix

  # Compute interpolated appearance embeddings
  num_interpolations = len(base_paths)
  interpolated_embeddings = []
  delta_vec = (embedding2 - embedding1) / (num_interpolations - 1)
  for delta_iter in range(num_interpolations):
    x_app_embedding = embedding1 + delta_iter * delta_vec
    interpolated_embeddings.append(x_app_embedding)

  # Generate and save interpolated images
  for frame_idx, embedding in enumerate(interpolated_embeddings):
    # Read in input frame
    frame_render_path = osp.join(base_paths[frame_idx] + '_color.png')
    frame_depth_path = osp.join(base_paths[frame_idx] + '_depth.png')
    frame_seg_path = osp.join(base_paths[frame_idx] + '_seg_rgb.png')
    x_in = _load_and_concatenate_image_channels(
        rgb_path=None, rendered_path=frame_render_path,
        depth_path=frame_depth_path, seg_path=frame_seg_path)

    img = next(est.predict(
        lambda: {'conditional_input': tf.convert_to_tensor(x_in),
                 'appearance_embedding': tf.convert_to_tensor(embedding)}))
    output_img_name = '%s_%s_%03d.png' % (st_app_basename, end_app_basename,
                                          frame_idx)
    output_img_path = osp.join(output_dir, output_img_name)
    print('Saving interpolated image to %s' % output_img_path)
    with tf.gfile.Open(output_img_path, 'wb') as f:
      f.write(utils.to_png(img))
예제 #9
0
    def make_sample_grid_and_save(self,
                                  ops,
                                  batch_size=16,
                                  random=4,
                                  interpolation=16,
                                  height=16,
                                  save_to_disk=True):
        """

        :param ops: AEops class, including train_op
        :param batch_size:
        :param random: number of reconstructed images = random * height
        :param interpolation: number of interpolation, namely the row number of compositive image
        :param height: number of hight
        :param save_to_disk:
        :return: recon, inter, slerp, samples
        """
        # Gather images
        pool_size = random * height + 2 * height  # 96
        current_size = 0

        with tf.Graph().as_default():
            data_in = self.test_data.make_one_shot_iterator().get_next()
            with tf.Session() as sess_new:
                images = []
                while current_size < pool_size:
                    images.append(sess_new.run(data_in)['x'])
                    current_size += images[-1].shape[0]
                images = np.concatenate(images,
                                        axis=0)[:pool_size]  # [96, 32, 32, 1]

        def batched_op(op, op_input, array):
            return np.concatenate([
                self.tf_sess.run(op,
                                 feed_dict={op_input: array[x:x + batch_size]})
                for x in range(0, array.shape[0], batch_size)
            ],
                                  axis=0)

        # 1. Random reconstructions
        if random:  # not zero
            random_x = images[:random * height]  # [64, 32, 32, 1]
            random_y = batched_op(ops.ae, ops.x, random_x)
            randoms = np.concatenate(
                [random_x, random_y],
                axis=2)  # ae output: [64, 32, 32, 1] => [64, 32, 64, 1]
            image_random = utils.images_to_grid(  # [16, 4, 32, 64, 1] => [512, 256, 1]
                randoms.reshape((height, random) + randoms.shape[1:]))
        else:
            image_random = None

        # 2. Interpolations
        interpolation_x = images[-2 * height:]  # [32, 32, 32, 1]
        latent_x = batched_op(ops.encode, ops.x,
                              interpolation_x)  # [32, 4, 4, 16]
        latents = []
        for x in range(interpolation):
            latents.append((latent_x[:height] *
                            (interpolation - x - 1) + latent_x[height:] * x) /
                           float(interpolation - 1))
        latents = np.concatenate(latents, axis=0)  # [256, 4, 4, 16]
        interpolation_y = batched_op(ops.decode, ops.h,
                                     latents)  # [256, 32, 32, 1]
        interpolation_y = interpolation_y.reshape(  # [16, 16, 32, 32, 1]
            (interpolation, height) + interpolation_y.shape[1:])
        interpolation_y = interpolation_y.transpose(1, 0, 2, 3, 4)
        image_interpolation = utils.images_to_grid(
            interpolation_y)  # [512, 512, 1]

        # 3. Interpolation by slerp
        latents_slerp = []
        dots = np.sum(latent_x[:height] * latent_x[height:],
                      tuple(range(1, len(latent_x.shape))),
                      keepdims=True)  # [16, 1, 1, 1]
        norms = np.sum(latent_x * latent_x,
                       tuple(range(1, len(latent_x.shape))),
                       keepdims=True)  # [32, 1, 1, 1]
        cosine_dist = dots / np.sqrt(
            norms[:height] * norms[height:])  # [16, 1, 1, 1]
        omega = np.arccos(cosine_dist)
        for x in range(interpolation):
            t = x / float(interpolation - 1)
            latents_slerp.append(
                np.sin((1 - t) * omega) / np.sin(omega) * latent_x[:height] +
                np.sin(t * omega) / np.sin(omega) * latent_x[height:])
        latents_slerp = np.concatenate(
            latents_slerp, axis=0)  # 16 of[16, 4, 4, 16] => [256, 4, 4, 16]
        interpolation_y_slerp = batched_op(ops.decode, ops.h,
                                           latents_slerp)  # [256, 32, 32, 1]
        interpolation_y_slerp = interpolation_y_slerp.reshape(
            (interpolation, height) +
            interpolation_y_slerp.shape[1:])  # [16, 16, 32, 32, 1]
        interpolation_y_slerp = interpolation_y_slerp.transpose(1, 0, 2, 3, 4)
        image_interpolation_slerp = utils.images_to_grid(
            interpolation_y_slerp)  # [512, 512, 1]

        # 4. get decoder by random normal dist of hidden h
        random_latents = np.random.standard_normal(
            latents.shape)  # [256, 4, 4, 16]
        samples_y = batched_op(ops.decode, ops.h, random_latents)
        samples_y = samples_y.reshape((interpolation, height) +
                                      samples_y.shape[1:])
        samples_y = samples_y.transpose(1, 0, 2, 3, 4)
        image_samples = utils.images_to_grid(samples_y)  # [512, 512, 1]

        if random:  # [512, 256+512+512+512, 1]
            image = np.concatenate([
                image_random, image_interpolation, image_interpolation_slerp,
                image_samples
            ],
                                   axis=1)
        else:
            image = np.concatenate([
                image_interpolation, image_interpolation_slerp, image_samples
            ],
                                   axis=1)
        if save_to_disk:
            utils.save_images(utils.to_png(image), self.image_dir,
                              self.cur_nimg)

        return image_random, image_interpolation, image_interpolation_slerp, image_samples