def _load_and_concatenate_image_channels(rgb_path=None,
                                         rendered_path=None,
                                         depth_path=None,
                                         seg_path=None,
                                         crop_size=512):
    if (rgb_path is None and rendered_path is None and depth_path is None
            and seg_path is None):
        raise ValueError('At least one of the inputs has to be not None')

    channels = ()
    if rgb_path is not None:
        rgb_img = np.array(Image.open(rgb_path)).astype(np.float32)
        rgb_img = utils.get_central_crop(rgb_img, crop_size, crop_size)
        channels = channels + (rgb_img, )
    if rendered_path is not None:
        rendered_img = np.array(Image.open(rendered_path)).astype(np.float32)
        rendered_img = utils.get_central_crop(rendered_img, crop_size,
                                              crop_size)
        if not opts.use_alpha:
            rendered_img = rendered_img[:, :, :3]  # drop the alpha channel
        channels = channels + (rendered_img, )
    if depth_path is not None:
        depth_img = np.array(Image.open(depth_path))
        depth_img = depth_img.astype(np.float32)
        depth_img = utils.get_central_crop(depth_img, crop_size, crop_size)
        channels = channels + (depth_img, )
    if seg_path is not None:
        seg_img = np.array(Image.open(seg_path)).astype(np.float32)
        channels = channels + (seg_img, )
    # Concatenate and normalize channels
    img = np.dstack(channels)
    img = img * (2.0 / 255) - 1.0
    return img
  def next(self):
    if self.iter_idx < len(self.filenames):
      rendered_img_name = self.filenames[self.iter_idx]
      basename = rendered_img_name[:-9]  # remove the 'color.png' suffix
      ref_img_name = basename + 'reference.png'
      depth_img_name = basename + 'depth.png'
      # Read the 3D rendered image
      img_rendered = cv2.imread(rendered_img_name, cv2.IMREAD_UNCHANGED)
      # Change BGR (default cv2 format) to RGB
      img_rendered = img_rendered[:, :, [2,1,0,3]]  # it has a 4th alpha channel
      # Read the depth image
      img_depth = cv2.imread(depth_img_name, cv2.IMREAD_UNCHANGED)
      # Workaround as some depth images are read with a different data type!
      img_depth = img_depth.astype(np.uint16)
      # Read reference image if exists, otherwise replace with a zero image.
      if osp.exists(ref_img_name):
        img_ref = cv2.imread(ref_img_name)
        img_ref = img_ref[:, :, ::-1]  # Change BGR to RGB format.
      else:  # use a dummy 3-channel zero image as a placeholder
        print('Warning: no reference image found! Using a dummy placeholder!')
        img_height, img_width = img_depth.shape
        img_ref = np.zeros((img_height, img_width, 3), dtype=np.uint8)

      if self.use_semantic_map:
        semantic_seg_img_name = basename + 'seg_rgb.png'
        img_seg = cv2.imread(semantic_seg_img_name)
        img_seg = img_seg[:, :, ::-1]  # Change from BGR to RGB
        if img_seg.shape[0] == 512 and img_seg.shape[1] == 512:
          img_ref = utils.get_central_crop(img_ref)
          img_rendered = utils.get_central_crop(img_rendered)
          img_depth = utils.get_central_crop(img_depth)

      img_shape = img_depth.shape
      assert img_seg.shape == (img_shape + (3,)), 'error in seg image %s %s' % (
        basename, str(img_seg.shape))
      assert img_ref.shape == (img_shape + (3,)), 'error in ref image %s %s' % (
        basename, str(img_ref.shape))
      assert img_rendered.shape == (img_shape + (4,)), ('error in rendered '
        'image %s %s' % (basename, str(img_rendered.shape)))
      assert len(img_depth.shape) == 2, 'error in depth image %s %s' % (
        basename, str(img_depth.shape))

      raw_example = dict()
      raw_example['height'] = img_ref.shape[0]
      raw_example['width'] = img_ref.shape[1]
      raw_example['rendered'] = img_rendered.tostring()
      raw_example['depth'] = img_depth.tostring()
      raw_example['real'] = img_ref.tostring()
      if self.use_semantic_map:
        raw_example['seg'] = img_seg.tostring()
      self.iter_idx += 1
      return raw_example
    else:
      raise StopIteration()
예제 #3
0
def compute_pairwise_style_loss_v2(image_paths_list):
    grams_all = [None] * len(image_paths_list)
    crop_height, crop_width = opts.train_resolution, opts.train_resolution
    img_var = tf.placeholder(tf.float32, shape=[1, crop_height, crop_width, 3])
    vgg_layers = ['conv%d_2' % i for i in range(1, 6)]  # conv1 through conv5
    grams_ops = compute_gram_matrices(img_var, vgg_layers)
    with tf.Session() as sess:
        for ii, img_path in enumerate(image_paths_list):
            print('Computing gram matrices for image #%d' % (ii + 1))
            img = np.array(Image.open(img_path), dtype=np.float32)
            img = img * 2. / 255. - 1  # normalize image
            img = utils.get_central_crop(img, crop_height, crop_width)
            img = np.expand_dims(img, axis=0)
            grams_all[ii] = sess.run(grams_ops, feed_dict={img_var: img})
    print('Number of images = %d' % len(grams_all))
    print('Gram matrices per image:')
    for i in range(len(grams_all[0])):
        print('gram_matrix[%d].shape = %s' % (i, grams_all[0][i].shape))
    n_imgs = len(grams_all)
    dist_matrix = np.zeros((n_imgs, n_imgs))
    for i in range(n_imgs):
        print('Computing distances for image #%d' % i)
        for j in range(i + 1, n_imgs):
            loss_style = 0
            # Compute loss using all gram matrices from all layers
            for gram_i, gram_j in zip(grams_all[i], grams_all[j]):
                loss_style += np.mean((gram_i - gram_j)**2, axis=(1, 2))
            dist_matrix[i][j] = dist_matrix[j][i] = loss_style

    return dist_matrix
예제 #4
0
def segment_images(images_path,
                   xception_frozen_graph_path,
                   save_dir,
                   crop_height=512,
                   crop_width=512):
    if not osp.exists(xception_frozen_graph_path):
        raise OSError('Xception frozen graph not found at %s' %
                      xception_frozen_graph_path)
    with tf.gfile.GFile(xception_frozen_graph_path, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    with tf.Graph().as_default() as graph:
        new_input = tf.placeholder(tf.uint8, [1, crop_height, crop_width, 3],
                                   name="new_input")
        tf.import_graph_def(graph_def,
                            input_map={"ImageTensor:0": new_input},
                            return_elements=None,
                            name="sem_seg",
                            op_dict=None,
                            producer_op_list=None)

    corrupted_dir = osp.join(save_dir, 'corrupted')
    if not osp.exists(corrupted_dir):
        os.makedirs(corrupted_dir)
    with tf.Session(graph=graph) as sess:
        for i, img_path in enumerate(images_path):
            print('Segmenting image %05d / %05d: %s' %
                  (i + 1, len(images_path), img_path))
            img = np.array(Image.open(img_path))
            if len(img.shape) == 2 or img.shape[2] != 3:
                print('Warning! corrupted image %s' % img_path)
                img_base_path = img_path[:
                                         -14]  # remove the '_reference.png' suffix
                srcs = sorted(glob.glob(img_base_path + '_*'))
                dest = unicode(corrupted_dir + '/.')
                for src in srcs:
                    shutil.move(src, dest)
                continue
            img = utils.get_central_crop(img,
                                         crop_height=crop_height,
                                         crop_width=crop_width)
            img = np.expand_dims(img, 0)  # convert to NHWC format
            seg = sess.run("sem_seg/SemanticPredictions:0",
                           feed_dict={new_input: img})
            assert np.max(
                seg[:]) <= 255, 'segmentation image is not of type uint8!'
            seg = np.squeeze(
                np.uint8(seg))  # convert to uint8 and squeeze to WxH.
            parent_dir, filename = osp.split(img_path)
            basename, ext = osp.splitext(filename)
            basename = basename[:-10]  # remove the '_reference' suffix
            seg_filename = basename + "_seg.png"
            seg_filepath = osp.join(save_dir, seg_filename)
            # Save segmentation image
            Image.fromarray(seg).save(seg_filepath)
예제 #5
0
    def conv_model(self, inputs, step):
        """
        Construct the CNN
        Args:
            inputs: Tensor with shape [n, num_landmarks, patch_shape, patch_shape, 3]
            step(int): RNN step
        Returns:
        """
        net = {}
        with tf.name_scope('mdm_conv{}'.format(step), values=[inputs]):
            inputs = tf.reshape(
                inputs,
                (self.batch_size * self.num_patches, self.patch_shape[0], self.patch_shape[1], self.num_channels)
            )
            # Convolution 1
            inputs = self.conv2d_bn(inputs, name='conv_1')
            self.visualize_cnn_mean(step, inputs, 'conv_1')
            net['conv_1'] = inputs
            inputs = tf.layers.max_pooling2d(inputs, [2, 2], [2, 2])
            net['pool_1'] = inputs

            # Convolution 2
            inputs = self.conv2d_bn(inputs, name='conv_2')
            self.visualize_cnn_mean(step, inputs, 'conv_2')
            net['conv_2'] = inputs
            inputs = tf.layers.max_pooling2d(inputs, [2, 2], [2, 2])
            net['pool_2'] = inputs

            # Convolution 3
            inputs = self.conv2d_bn(inputs, name='conv_3')
            self.visualize_cnn_mean(step, inputs, 'conv_3')
            net['conv_3'] = inputs
            inputs = tf.layers.max_pooling2d(inputs, [2, 2], [2, 2])
            net['pool_3'] = inputs

            # Crop
            crop_size = inputs.get_shape().as_list()[1:3]
            cropped = utils.get_central_crop(net['conv_3'], box=crop_size)
            net['conv_3_cropped'] = cropped
            inputs = tf.concat([cropped, inputs], 3)

            # Flatten
            inputs = tf.reshape(inputs, (self.batch_size, -1))
        net['concat'] = inputs
        return inputs, net
예제 #6
0
def conv_model(inputs, is_training=True, scope=''):

  # summaries or losses.
  net = {}

  with tf.op_scope([inputs], scope, 'mdm_conv'):
    with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training):
      with scopes.arg_scope([ops.conv2d], activation=tf.nn.relu, padding='VALID'):
        net['conv_1'] = ops.conv2d(inputs, 32, [3, 3], scope='conv_1')
        net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2])
        net['conv_2'] = ops.conv2d(net['pool_1'], 32, [3, 3], scope='conv_2')
        net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2])

        crop_size = net['pool_2'].get_shape().as_list()[1:3]
        net['conv_2_cropped'] = utils.get_central_crop(net['conv_2'], box=crop_size)

        net['concat'] = tf.concat(3, [net['conv_2_cropped'], net['pool_2']])
  return net
예제 #7
0
def conv_model(inputs, is_training=True, scope=''):
    # summaries or losses.
    net = {}

    with tf.name_scope(scope, 'mdm_conv', [inputs]):  # 给下面op_name 加前缀mdm_conv 用with 语句解决资源释放问题
        with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training):
            with scopes.arg_scope([ops.conv2d], activation=tf.nn.relu, padding='VALID'):
                net['conv_1'] = ops.conv2d(inputs, 32, [3, 3], scope='conv_1')
                net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2])
                net['conv_2'] = ops.conv2d(net['pool_1'], 32, [3, 3], scope='conv_2')
                net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2])
                # 两个卷积层 每层32个过滤器 3*3核
                # 每层卷积层后有一个2*2 的最大池化层
                crop_size = net['pool_2'].get_shape().as_list()[1:3]
                net['conv_2_cropped'] = utils.get_central_crop(net['conv_2'], box=crop_size)
                # 中央作物的激活与第二池化层的输出,
                # 通过跳转链接concat连接起来,以保留更多相关本地信息,否则使用max池化层会丢失这些信息
                net['concat'] = tf.concat([net['conv_2_cropped'], net['pool_2']], 3)  # axis=3
    return net
예제 #8
0
def conv_model(inputs, is_training=True, scope=''):
    # summaries or losses.
    net = {}

    with tf.name_scope(scope, 'rdn_conv', [inputs]):
        with scopes.arg_scope([ops.conv2d, ops.fc], is_training=is_training):
            with scopes.arg_scope([ops.conv2d],
                                  activation=tf.nn.relu,
                                  padding='VALID'):
                net['conv_1'] = ops.conv2d(inputs, 32, [3, 3], scope='conv_1')
                net['pool_1'] = ops.max_pool(net['conv_1'], [2, 2])
                net['conv_2'] = ops.conv2d(net['pool_1'],
                                           32, [3, 3],
                                           scope='conv_2')
                net['pool_2'] = ops.max_pool(net['conv_2'], [2, 2])

                crop_size = net['pool_2'].get_shape().as_list()[1:3]
                net['conv_2_cropped'] = utils.get_central_crop(net['conv_2'],
                                                               box=crop_size)

                net['concat'] = tf.concat(
                    [net['conv_2_cropped'], net['pool_2']], 3)
    return net
예제 #9
0
  def next(self):
    if self.iter_idx < len(self.filenames):
      rendered_img_name = self.filenames[self.iter_idx]
      basename = rendered_img_name[:-9]  # remove the 'color.png' suffix
      ref_img_name = basename + 'reference.png'
      depth_img_name = basename + 'depth.png'
      normal_img_name = basename + 'normal.png'
      wc_img_name = basename + 'wc.png'
      point_json_name = basename + 'point.txt'
      # Read the 3D rendered image
      img_rendered = cv2.imread(rendered_img_name, cv2.IMREAD_UNCHANGED)
      # Change BGR (default cv2 format) to RGB
      img_rendered = img_rendered[:, :, [2,1,0]]
      # Read the depth image
      img_depth = cv2.imread(depth_img_name, cv2.IMREAD_UNCHANGED)
      # Workaround as some depth images are read with a different data type!
      img_depth = img_depth.astype(np.uint16)
      # Read reference image if exists, otherwise replace with a zero image.
      if osp.exists(ref_img_name):
        img_ref = cv2.imread(ref_img_name)
        img_ref = img_ref[:, :, ::-1]  # Change BGR to RGB format.
      else:  # use a dummy 3-channel zero image as a placeholder
        print('Warning: no reference image found! Using a dummy placeholder!')
        img_height, img_width = img_depth.shape
        img_ref = np.zeros((img_height, img_width, 3), dtype=np.uint8)
      
      if osp.exists(normal_img_name):
        img_normal = cv2.imread(normal_img_name, cv2.IMREAD_UNCHANGED)
      else:
        print('Warning: no normal image found! Using a dummy placeholder!')
        img_height, img_width = img_depth.shape
        img_normal = np.zeros((img_height, img_width, 3), dtype=np.uint8)

      if osp.exists(wc_img_name):
        img_wc = cv2.imread(wc_img_name, cv2.IMREAD_UNCHANGED)
      else:
        print('Warning: no wc image found! Using a dummy placeholder!')
        img_height, img_width = img_depth.shape
        img_wc = np.zeros((img_height, img_width, 3), dtype=np.uint8)
      
      if osp.exists(point_json_name):
        with open(point_json_name) as json_file:
          json_point = json.load(json_file)
        json_point = {ast.literal_eval(k): v for k, v in json_point.items()}
        img_height, img_width = img_depth.shape
        img_point = np.full((img_height, img_width), 0, dtype=np.uint8)
        for x, y in json_point:
          img_point[y][x] = json_point[(x,y)] + 1 # NOTE bug in the zbuffer algo
      else:
        print('Warning: no point json found! Using a dummy placeholder!')
        img_height, img_width = img_depth.shape
        img_point = np.full((img_height, img_width), 0, dtype=np.uint8)
      
      if self.use_semantic_map:
        semantic_seg_img_name = basename + 'seg_rgb.png'
        img_seg = cv2.imread(semantic_seg_img_name)
        img_seg = img_seg[:, :, ::-1]  # Change from BGR to RGB
        if img_seg.shape[0] == 512 and img_seg.shape[1] == 512:
          img_ref = utils.get_central_crop(img_ref)
          img_rendered = utils.get_central_crop(img_rendered)
          img_depth = utils.get_central_crop(img_depth)
          img_normal = utils.get_central_crop(img_normal)
          img_wc = utils.get_central_crop(img_wc)
          img_point = utils.get_central_crop(img_point)

      img_shape = img_depth.shape
      assert img_seg.shape == (img_shape + (3,)), 'error in seg image %s %s' % (
        basename, str(img_seg.shape))
      assert img_ref.shape == (img_shape + (3,)), 'error in ref image %s %s' % (
        basename, str(img_ref.shape))
      assert img_rendered.shape == (img_shape + (3,)), ('error in rendered '
        'image %s %s' % (basename, str(img_rendered.shape)))
      assert img_normal.shape == (img_shape + (3,)), ('error in normal '
        'image %s %s' % (basename, str(img_normal.shape)))
      assert img_wc.shape == (img_shape + (3,)), 'error in wc image %s %s' % (
        basename, str(img_wc.shape))
      assert len(img_depth.shape) == 2, 'error in depth image %s %s' % (
        basename, str(img_depth.shape))
      assert len(img_point.shape) == 2, 'error in point image %s %s' % (
        basename, str(img_point.shape))

      raw_example = dict()
      raw_example['height'] = img_ref.shape[0]
      raw_example['width'] = img_ref.shape[1]
      raw_example['rendered'] = img_rendered.tostring()
      raw_example['depth'] = img_depth.tostring()
      raw_example['real'] = img_ref.tostring()
      raw_example['normal'] = img_normal.tostring()
      raw_example['wc'] = img_wc.tostring()
      raw_example['point'] = img_point.tostring()
      if self.use_semantic_map:
        raw_example['seg'] = img_seg.tostring()
      self.iter_idx += 1
      return raw_example
    else:
      raise StopIteration()