예제 #1
0
def main():
    """Main function."""

    inverter = StyleGANInverter(model_name,
                                mode=mode,
                                learning_rate=ini_lr,
                                iteration=step,
                                reconstruction_loss_weight=lambda_l2,
                                perceptual_loss_weight=lambda_feat,
                                regularization_loss_weight=lambda_enc,
                                clip_loss_weight=lambda_clip,
                                description=description)
    image_size = inverter.G.resolution

    text_inputs = torch.cat([clip.tokenize(description)]).cuda()

    # Invert images.
    # uploaded_file = uploaded_file.read()
    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        # st.image(image, caption='Uploaded Image.', use_column_width=True)
        # st.write("")
        st.write("Just a second...")

        image = resize_image(np.array(image), (image_size, image_size))
        _, viz_results = inverter.easy_invert(image, 1)
        if mode == 'man':
            final_result = np.hstack([image, viz_results[-1]])
        else:
            final_result = np.hstack([viz_results[1], viz_results[-1]])

        # return final_result
        with st.beta_container():
            st.image(final_result, use_column_width=True)
예제 #2
0
파일: invert.py 프로젝트: NivC/TediGAN
def main():
    """Main function."""
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    assert os.path.isfile(args.image_path)
    output_dir = args.output_dir or f'results/inversion/test'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    inverter = StyleGANInverter(
        args.model_name,
        mode=args.mode,
        learning_rate=args.learning_rate,
        iteration=args.num_iterations,
        reconstruction_loss_weight=1.0,
        perceptual_loss_weight=args.loss_weight_feat,
        regularization_loss_weight=args.loss_weight_enc,
        clip_loss_weight=args.loss_weight_clip,
        description=args.description,
        logger=None)
    image_size = inverter.G.resolution

    # Invert the given image.
    image = resize_image(load_image(args.image_path), (image_size, image_size))
    _, viz_results = inverter.easy_invert(image, num_viz=args.num_results)

    if args.mode == 'man':
        image_name = os.path.splitext(os.path.basename(args.image_path))[0]
    else:
        image_name = 'gen'
    save_image(f'{output_dir}/{image_name}_enc.png', viz_results[1])
    save_image(f'{output_dir}/{image_name}_inv.png', viz_results[-1])
    print(f'save {image_name} in {output_dir}')
def main():
  """Main function."""
  args = parse_args()
  os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
  assert os.path.exists(args.image_list)
  image_list_name = os.path.splitext(os.path.basename(args.image_list))[0]
  output_dir = args.output_dir or f'results/inversion/{image_list_name}'
  logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger')

  logger.info(f'Loading model.')
  inverter = StyleGANInverter(
      args.model_name,
      learning_rate=args.learning_rate,
      iteration=args.num_iterations,
      reconstruction_loss_weight=1.0,
      perceptual_loss_weight=args.loss_weight_feat,
      regularization_loss_weight=args.loss_weight_enc,
      logger=logger)
  image_size = inverter.G.resolution

  # Load image list.
  logger.info(f'Loading image list.')
  image_list = []
  with open(args.image_list, 'r') as f:
    for line in f:
      image_list.append(line.strip())

  # Initialize visualizer.
  save_interval = args.num_iterations // args.num_results
  headers = ['Name', 'Original Image', 'Encoder Output']
  for step in range(1, args.num_iterations + 1):
    if step == args.num_iterations or step % save_interval == 0:
      headers.append(f'Step {step:06d}')
  viz_size = None if args.viz_size == 0 else args.viz_size
  visualizer = HtmlPageVisualizer(
      num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size)
  visualizer.set_headers(headers)

  # Invert images.
  logger.info(f'Start inversion.')
  latent_codes = []
  for img_idx in tqdm(range(len(image_list)), leave=False):
    image_path = image_list[img_idx]
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    image = resize_image(load_image(image_path), (image_size, image_size))
    code, viz_results = inverter.easy_invert(image, num_viz=args.num_results)
    latent_codes.append(code)
    save_image(f'{output_dir}/{image_name}_ori.png', image)
    save_image(f'{output_dir}/{image_name}_enc.png', viz_results[1])
    save_image(f'{output_dir}/{image_name}_inv.png', viz_results[-1])
    visualizer.set_cell(img_idx, 0, text=image_name)
    visualizer.set_cell(img_idx, 1, image=image)
    for viz_idx, viz_img in enumerate(viz_results[1:]):
      visualizer.set_cell(img_idx, viz_idx + 2, image=viz_img)

  # Save results.
  os.system(f'cp {args.image_list} {output_dir}/image_list.txt')
  np.save(f'{output_dir}/inverted_codes.npy',
          np.concatenate(latent_codes, axis=0))
  visualizer.save(f'{output_dir}/inversion.html')
예제 #4
0
    def predict(self, image, description):

        self.args.description = description
        self.args.image_path = image

        inverter = StyleGANInverter(
            self.args.model_name,
            mode=self.args.mode,
            learning_rate=self.args.learning_rate,
            iteration=self.args.num_iterations,
            reconstruction_loss_weight=1.0,
            perceptual_loss_weight=self.args.loss_weight_feat,
            regularization_loss_weight=self.args.loss_weight_enc,
            clip_loss_weight=self.args.loss_weight_clip,
            description=self.args.description,
            logger=None,
        )
        image_size = inverter.G.resolution

        # Invert the given image.
        image = resize_image(
            load_image(str(self.args.image_path)), (image_size, image_size)
        )
        _, viz_results = inverter.easy_invert(image, num_viz=self.args.num_results)

        out_path = Path(tempfile.mkdtemp()) / "out.png"
        save_image(str(out_path), viz_results[-1])
        return out_path
예제 #5
0
def load_test_images(dir, image_size = 128):
    img_paths = get_test_images_paths(dir)
    imgs = []

    for i, image_path in enumerate(img_paths):
        IMG = resize_image(load_image(image_path), (image_size, image_size))
        imgs.append(IMG)

    return imgs
예제 #6
0
def main():
    """Main function."""
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    assert os.path.exists(args.image_list)
    image_list_name = os.path.splitext(os.path.basename(args.image_list))[0]
    output_dir = args.output_dir or f'results/inversion/{image_list_name}'
    logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger')

    logger.info(f'Loading model.')
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(args.model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    # Build graph.
    logger.info(f'Building graph.')
    sess = tf.get_default_session()
    input_shape = E.input_shape
    input_shape[0] = args.batch_size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255
    latent_shape = Gs.components.synthesis.input_shape
    latent_shape[0] = args.batch_size
    wp = tf.get_variable(shape=latent_shape, name='latent_code')
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255
    if args.random_init:
        logger.info(f'  Use random initialization for optimization.')
        wp_rnd = tf.random.normal(shape=latent_shape, name='latent_code_init')
        setter = tf.assign(wp, wp_rnd)
    else:
        logger.info(
            f'  Use encoder output as the initialization for optimization.')
        w_enc = E.get_output_for(x, is_training=False)
        wp_enc = tf.reshape(w_enc, latent_shape)
        setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    logger.info(f'Setting configuration for optimization.')
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_255)
    x_rec_feat = perceptual_model(x_rec_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3])
    if args.domain_regularizer:
        logger.info(f'  Involve encoder for optimization.')
        w_enc_new = E.get_output_for(x_rec, is_training=False)
        wp_enc_new = tf.reshape(w_enc_new, latent_shape)
        loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2])
    else:
        logger.info(f'  Do NOT involve encoder for optimization.')
        loss_enc = 0
    loss = (loss_pix + args.loss_weight_feat * loss_feat +
            args.loss_weight_enc * loss_enc)
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Load image list.
    logger.info(f'Loading image list.')
    image_list = []
    with open(args.image_list, 'r') as f:
        for line in f:
            image_list.append(line.strip())

    # Invert images.
    logger.info(f'Start inversion.')
    save_interval = args.num_iterations // args.num_results
    headers = ['Name', 'Original Image', 'Encoder Output']
    for step in range(1, args.num_iterations + 1):
        if step == args.num_iterations or step % save_interval == 0:
            headers.append(f'Step {step:06d}')
    viz_size = None if args.viz_size == 0 else args.viz_size
    visualizer = HtmlPageVisualizer(num_rows=len(image_list),
                                    num_cols=len(headers),
                                    viz_size=viz_size)
    visualizer.set_headers(headers)

    images = np.zeros(input_shape, np.uint8)
    names = ['' for _ in range(args.batch_size)]
    latent_codes_enc = []
    latent_codes = []
    for img_idx in tqdm(range(0, len(image_list), args.batch_size),
                        leave=False):
        # Load inputs.
        batch = image_list[img_idx:img_idx + args.batch_size]
        for i, image_path in enumerate(batch):
            image = resize_image(load_image(image_path),
                                 (image_size, image_size))
            images[i] = np.transpose(image, [2, 0, 1])
            names[i] = os.path.splitext(os.path.basename(image_path))[0]
        inputs = images.astype(np.float32) / 255 * 2.0 - 1.0
        # Run encoder.
        sess.run([setter], {x: inputs})
        outputs = sess.run([wp, x_rec])
        latent_codes_enc.append(outputs[0][0:len(batch)])
        outputs[1] = adjust_pixel_range(outputs[1])
        for i, _ in enumerate(batch):
            image = np.transpose(images[i], [1, 2, 0])
            save_image(f'{output_dir}/{names[i]}_ori.png', image)
            save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i])
            visualizer.set_cell(i + img_idx, 0, text=names[i])
            visualizer.set_cell(i + img_idx, 1, image=image)
            visualizer.set_cell(i + img_idx, 2, image=outputs[1][i])
        # Optimize latent codes.
        col_idx = 3
        for step in tqdm(range(1, args.num_iterations + 1), leave=False):
            sess.run(train_op, {x: inputs})
            if step == args.num_iterations or step % save_interval == 0:
                outputs = sess.run([wp, x_rec])
                outputs[1] = adjust_pixel_range(outputs[1])
                for i, _ in enumerate(batch):
                    if step == args.num_iterations:
                        save_image(f'{output_dir}/{names[i]}_inv.png',
                                   outputs[1][i])
                    visualizer.set_cell(i + img_idx,
                                        col_idx,
                                        image=outputs[1][i])
                col_idx += 1
        latent_codes.append(outputs[0][0:len(batch)])

    # Save results.
    os.system(f'cp {args.image_list} {output_dir}/image_list.txt')
    np.save(f'{output_dir}/encoded_codes.npy',
            np.concatenate(latent_codes_enc, axis=0))
    np.save(f'{output_dir}/inverted_codes.npy',
            np.concatenate(latent_codes, axis=0))
    visualizer.save(f'{output_dir}/inversion.html')
예제 #7
0
def main():
    """Main function."""
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    assert os.path.exists(args.target_list)
    target_list_name = os.path.splitext(os.path.basename(args.target_list))[0]
    assert os.path.exists(args.context_list)
    context_list_name = os.path.splitext(os.path.basename(
        args.context_list))[0]
    output_dir = args.output_dir or f'results/diffusion'
    job_name = f'{target_list_name}_TO_{context_list_name}'
    logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger')

    logger.info(f'Loading model.')
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(args.model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]
    crop_size = args.crop_size
    crop_x = args.center_x - crop_size // 2
    crop_y = args.center_y - crop_size // 2
    mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32)
    mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0

    # Build graph.
    logger.info(f'Building graph.')
    sess = tf.get_default_session()
    input_shape = E.input_shape
    input_shape[0] = args.batch_size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1
    x_mask_255 = (x_mask + 1) / 2 * 255
    latent_shape = Gs.components.synthesis.input_shape
    latent_shape[0] = args.batch_size
    wp = tf.get_variable(shape=latent_shape, name='latent_code')
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1
    x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255

    w_enc = E.get_output_for(x, phase=False)
    wp_enc = tf.reshape(w_enc, latent_shape)
    setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    logger.info(f'Setting configuration for optimization.')
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_mask_255)
    x_rec_feat = perceptual_model(x_rec_mask_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3])

    loss = loss_pix + args.loss_weight_feat * loss_feat
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Load image list.
    logger.info(f'Loading target images and context images.')
    target_list = []
    with open(args.target_list, 'r') as f:
        for line in f:
            target_list.append(line.strip())
    num_targets = len(target_list)
    context_list = []
    with open(args.context_list, 'r') as f:
        for line in f:
            context_list.append(line.strip())
    num_contexts = len(context_list)
    num_pairs = num_targets * num_contexts

    # Invert images.
    logger.info(f'Start diffusion.')
    save_interval = args.num_iterations // args.num_results
    headers = [
        'Target Image', 'Context Image', 'Stitched Image', 'Encoder Output'
    ]
    for step in range(1, args.num_iterations + 1):
        if step == args.num_iterations or step % save_interval == 0:
            headers.append(f'Step {step:06d}')
    viz_size = None if args.viz_size == 0 else args.viz_size
    visualizer = HtmlPageVisualizer(num_rows=num_pairs,
                                    num_cols=len(headers),
                                    viz_size=viz_size)
    visualizer.set_headers(headers)

    images = np.zeros(input_shape, np.uint8)
    latent_codes_enc = []
    latent_codes = []
    for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False):
        # Load target.
        target_image = resize_image(load_image(target_list[target_idx]),
                                    (image_size, image_size))
        visualizer.set_cell(target_idx * num_contexts, 0, image=target_image)
        for context_idx in tqdm(range(0, num_contexts, args.batch_size),
                                desc='Context ID',
                                leave=False):
            row_idx = target_idx * num_contexts + context_idx
            batch = context_list[context_idx:context_idx + args.batch_size]
            for i, context_image_path in enumerate(batch):
                context_image = resize_image(load_image(context_image_path),
                                             (image_size, image_size))
                visualizer.set_cell(row_idx + i, 1, image=context_image)
                context_image[crop_y:crop_y + crop_size, crop_x:crop_x +
                              crop_size] = (target_image[crop_y:crop_y +
                                                         crop_size,
                                                         crop_x:crop_x +
                                                         crop_size])
                visualizer.set_cell(row_idx + i, 2, image=context_image)
                images[i] = np.transpose(context_image, [2, 0, 1])
            inputs = images.astype(np.float32) / 255 * 2.0 - 1.0
            # Run encoder.
            sess.run([setter], {x: inputs})
            outputs = sess.run([wp, x_rec])
            latent_codes_enc.append(outputs[0][0:len(batch)])
            outputs[1] = adjust_pixel_range(outputs[1])
            for i, _ in enumerate(batch):
                visualizer.set_cell(row_idx + i, 3, image=outputs[1][i])
            # Optimize latent codes.
            col_idx = 4
            for step in tqdm(range(1, args.num_iterations + 1), leave=False):
                sess.run(train_op, {x: inputs})
                if step == args.num_iterations or step % save_interval == 0:
                    outputs = sess.run([wp, x_rec])
                    outputs[1] = adjust_pixel_range(outputs[1])
                    for i, _ in enumerate(batch):
                        visualizer.set_cell(row_idx + i,
                                            col_idx,
                                            image=outputs[1][i])
                    col_idx += 1
            latent_codes.append(outputs[0][0:len(batch)])

    # Save results.
    code_shape = [num_targets, num_contexts] + list(latent_shape[1:])
    np.save(f'{output_dir}/{job_name}_encoded_codes.npy',
            np.concatenate(latent_codes_enc, axis=0).reshape(code_shape))
    np.save(f'{output_dir}/{job_name}_inverted_codes.npy',
            np.concatenate(latent_codes, axis=0).reshape(code_shape))
    visualizer.save(f'{output_dir}/{job_name}.html')
    # Load the predictor
    predictor = dlib.shape_predictor("./shape_predictor_68_face_landmarks.dat")

    # Convert image into grayscale
    gray = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2GRAY)
    # Use detector to find landmarks
    faces = detector(gray)
    points = np.arange(54).reshape(1, 27, 2)
    for face in faces:
        x1 = face.left()  # left point
        y1 = face.top()  # top point
        x2 = face.right()  # right point
        y2 = face.bottom()  # bottom point
        # Create landmark object
        landmarks = predictor(image=gray, box=face)
        # Loop through all the points
        cnt = 0
        for n in range(0, 27):
            x = landmarks.part(n).x
            y = landmarks.part(n).y
            if n > 16:
                n = 26 - cnt
                cnt += 1
            points[0][n] = [x, y]

    return points

if __name__=="__main__":
    img = img = resize_image(cv2.imread("/Users/taehoonlee/Desktop/align/100it.png"), (256, 256))
    feature(img)
def main():
  """Main function."""
  args = parse_args()
  os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
  assert os.path.exists(args.target_list)
  target_list_name = os.path.splitext(os.path.basename(args.target_list))[0]
  assert os.path.exists(args.context_list)
  context_list_name = os.path.splitext(os.path.basename(args.context_list))[0]
  output_dir = args.output_dir or f'results/diffusion'
  job_name = f'{target_list_name}_TO_{context_list_name}'
  logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger')

  logger.info(f'Loading model.')
  inverter = StyleGANInverter(
      args.model_name,
      learning_rate=args.learning_rate,
      iteration=args.num_iterations,
      reconstruction_loss_weight=1.0,
      perceptual_loss_weight=args.loss_weight_feat,
      regularization_loss_weight=0.0,
      logger=logger)
  image_size = inverter.G.resolution

  # Load image list.
  logger.info(f'Loading target images and context images.')
  target_list = []
  with open(args.target_list, 'r') as f:
    for line in f:
      target_list.append(line.strip())
  num_targets = len(target_list)
  context_list = []
  with open(args.context_list, 'r') as f:
    for line in f:
      context_list.append(line.strip())
  num_contexts = len(context_list)
  num_pairs = num_targets * num_contexts

  # Initialize visualizer.
  save_interval = args.num_iterations // args.num_results
  headers = ['Target Image', 'Context Image', 'Stitched Image',
             'Encoder Output']
  for step in range(1, args.num_iterations + 1):
    if step == args.num_iterations or step % save_interval == 0:
      headers.append(f'Step {step:06d}')
  viz_size = None if args.viz_size == 0 else args.viz_size
  visualizer = HtmlPageVisualizer(
      num_rows=num_pairs, num_cols=len(headers), viz_size=viz_size)
  visualizer.set_headers(headers)

  # Diffuse images.
  logger.info(f'Start diffusion.')
  latent_codes = []
  for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False):
    # Load target.
    target_image = resize_image(load_image(target_list[target_idx]),
                                (image_size, image_size))
    visualizer.set_cell(target_idx * num_contexts, 0, image=target_image)
    for context_idx in tqdm(range(num_contexts), desc='Context ID',
                            leave=False):
      row_idx = target_idx * num_contexts + context_idx
      context_image = resize_image(load_image(context_list[context_idx]),
                                   (image_size, image_size))
      visualizer.set_cell(row_idx, 1, image=context_image)
      code, viz_results = inverter.easy_diffuse(target=target_image,
                                                context=context_image,
                                                center_x=args.center_x,
                                                center_y=args.center_y,
                                                crop_x=args.crop_size,
                                                crop_y=args.crop_size,
                                                num_viz=args.num_results)
      for viz_idx, viz_img in enumerate(viz_results):
        visualizer.set_cell(row_idx, viz_idx + 2, image=viz_img)
      latent_codes.append(code)

  # Save results.

  os.system(f'cp {args.target_list} {output_dir}/target_list.txt')
  os.system(f'cp {args.context_list} {output_dir}/context_list.txt')
  np.save(f'{output_dir}/{job_name}_inverted_codes.npy',
          np.concatenate(latent_codes, axis=0))
  visualizer.save(f'{output_dir}/{job_name}.html')
예제 #10
0
def encode(_target_image, _context_image, _output_dir):
    gpu_id = '0'
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
    print(_target_image)
    assert os.path.exists('./static/' + _target_image)
    _output_dir = _output_dir[:-4]
    output_dir = './static/' + _output_dir

    tflib.init_tf({'rnd.np_random_seed': 1000})
    model_path = './styleganinv_face_256.pkl'
    with open(model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    crop_size = 110  # default crop size.
    center_x = 125
    center_y = 145
    crop_x = center_x - crop_size // 2  # default coordinate-X
    crop_y = center_y - crop_size // 2  # default coordinate-Y

    mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32)
    mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0

    # Build graph.
    sess = tf.get_default_session()

    batch_size = 4
    input_shape = E.input_shape
    input_shape[0] = batch_size  # default batch size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1
    x_mask_255 = (x_mask + 1) / 2 * 255

    latent_shape = Gs.components.synthesis.input_shape
    latent_shape[0] = batch_size  # default batch size
    wp = tf.get_variable(shape=latent_shape, name='latent_code')
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1
    x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255

    w_enc = E.get_output_for(x, phase=False)
    wp_enc = tf.reshape(w_enc, latent_shape)
    setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    print("Diffusion : Settings for Optimization.")
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_mask_255)
    x_rec_feat = perceptual_model(x_rec_mask_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3])

    loss_weight_feat = 5e-5
    learning_rate = 0.01
    loss = loss_pix + loss_weight_feat * loss_feat  # default The perceptual loss scale for optimization. (default 5e-5)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Invert image
    num_iterations = 100
    num_results = 5
    save_interval = num_iterations // num_results

    images = np.zeros(input_shape, np.uint8)

    print("Load target image.")
    _target_image = './static/' + _target_image
    target_image = resize_image(load_image(_target_image),
                                (image_size, image_size))
    save_image('./' + output_dir + '_tar.png', target_image)

    print("Load context image.")
    context_image = getContextImage(_context_image)
    context_image = resize_image(load_image(context_image),
                                 (image_size, image_size))
    save_image('./' + output_dir + '_cont.png', context_image)

    # Inverting Context Image.
    # context_image = invert(model_path, getContextImage(_context_image), wp, latent_shape)
    save_image('./' + output_dir + '_cont_inv.png', context_image)

    # Create Stitched Image
    # context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = (
    #     target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]
    # )
    # context_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] = (
    #     target_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190]
    # )
    print("Cropping Image...")
    # context_image = cropImage(target_image, context_image)

    target_image, rect = cropWithWhite(target_image)
    target_image = fourChannels(target_image)
    target_image = cut(target_image)
    target_image = transBg(target_image)

    context_image = createStitchedImage(context_image, target_image, rect)
    save_image('./' + output_dir + '_sti.png', context_image)
    images[0] = np.transpose(context_image, [2, 0, 1])

    input = images.astype(np.float32) / 255 * 2.0 - 1.0

    # Run encoder
    print("Start Diffusion.")
    sess.run([setter], {x: input})
    output = sess.run([wp, x_rec])
    output[1] = adjust_pixel_range(output[1])

    col_idx = 4
    for step in tqdm(range(1, num_iterations + 1), leave=False):
        sess.run(train_op, {x: input})
        if step == num_iterations or step % save_interval == 0:
            output = sess.run([wp, x_rec])
            output[1] = adjust_pixel_range(output[1])
            if step == num_iterations:
                save_image(f'{output_dir}.png', output[1][0])
            col_idx += 1
    exit()
예제 #11
0
def invert(model_path, _image, _wp, _latent_shape):
    print("Inverting")
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    # Build graph.
    print("Inverting : Build Graph.")
    sess = tf.get_default_session()

    batch_size = 4
    input_shape = E.input_shape
    input_shape[0] = batch_size  # default batch size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255

    wp = _wp
    x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False)
    x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255

    w_enc = E.get_output_for(x, phase=False)
    wp_enc = tf.reshape(w_enc, _latent_shape)
    setter = tf.assign(wp, wp_enc)

    # Settings for optimization.
    print("Inverting : Settings for Optimization.")
    perceptual_model = PerceptualModel([image_size, image_size], False)
    x_feat = perceptual_model(x_255)
    x_rec_feat = perceptual_model(x_rec_255)
    loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1])
    loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3])
    w_enc_new = E.get_output_for(x_rec, phase=False)
    wp_enc_new = tf.reshape(w_enc_new, _latent_shape)
    loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2])
    loss = (loss_pix + 5e-5 * loss_feat + 2.0 * loss_enc)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    train_op = optimizer.minimize(loss, var_list=[wp])
    tflib.init_uninitialized_vars()

    # Invert image
    print("Start Inverting.")
    num_iterations = 40
    num_results = 2
    save_interval = num_iterations // num_results

    context_images = np.zeros(input_shape, np.uint8)

    context_image = resize_image(load_image(_image), (image_size, image_size))

    # Inverting Context Image.
    context_images[0] = np.transpose(context_image, [2, 0, 1])
    context_input = context_images.astype(np.float32) / 255 * 2.0 - 1.0

    sess.run([setter], {x: context_input})
    context_output = sess.run([wp, x_rec])
    context_output[1] = adjust_pixel_range(context_output[1])
    context_image = np.transpose(context_images[0], [1, 2, 0])

    for step in tqdm(range(1, num_iterations + 1), leave=False):
        sess.run(train_op, {x: context_input})
        if step == num_iterations or step % save_interval == 0:
            context_output = sess.run([wp, x_rec])
            context_output[1] = adjust_pixel_range(context_output[1])
            if step == num_iterations: context_image = context_output[1][0]

    return context_image
예제 #12
0
def main():
    """Main function."""
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    assert os.path.exists(args.image_list)
    image_list_name = os.path.splitext(os.path.basename(args.image_list))[0]
    output_dir = args.output_dir or f'results/ghfeat/{image_list_name}'
    logger = setup_logger(output_dir, 'extract_feature.log',
                          'inversion_logger')

    logger.info(f'Loading model.')
    tflib.init_tf({'rnd.np_random_seed': 1000})
    with open(args.model_path, 'rb') as f:
        E, _, _, Gs = pickle.load(f)

    # Get input size.
    image_size = E.input_shape[2]
    assert image_size == E.input_shape[3]

    G_args = EasyDict(func_name='training.networks_stylegan.G_synthesis')
    G_style_mod = tflib.Network('G_StyleMod',
                                resolution=image_size,
                                label_size=0,
                                **G_args)
    Gs_vars_pairs = {
        name: tflib.run(val)
        for name, val in Gs.components.synthesis.vars.items()
    }
    for g_name, g_val in G_style_mod.vars.items():
        tflib.set_vars({g_val: Gs_vars_pairs[g_name]})

    # Build graph.
    logger.info(f'Building graph.')
    sess = tf.get_default_session()
    input_shape = E.input_shape
    input_shape[0] = args.batch_size
    x = tf.placeholder(tf.float32, shape=input_shape, name='real_image')
    ghfeat = E.get_output_for(x, is_training=False)
    x_rec = G_style_mod.get_output_for(ghfeat, randomize_noise=False)

    # Load image list.
    logger.info(f'Loading image list.')
    image_list = []
    with open(args.image_list, 'r') as f:
        for line in f:
            image_list.append(line.strip())

    # Extract GH-Feat from images.
    logger.info(f'Start feature extraction.')
    headers = ['Name', 'Original Image', 'Encoder Output']
    viz_size = None if args.viz_size == 0 else args.viz_size
    visualizer = HtmlPageVisualizer(num_rows=len(image_list),
                                    num_cols=len(headers),
                                    viz_size=viz_size)
    visualizer.set_headers(headers)

    images = np.zeros(input_shape, np.uint8)
    names = ['' for _ in range(args.batch_size)]
    features = []
    for img_idx in tqdm(range(0, len(image_list), args.batch_size),
                        leave=False):
        # Load inputs.
        batch = image_list[img_idx:img_idx + args.batch_size]
        for i, image_path in enumerate(batch):
            image = resize_image(load_image(image_path),
                                 (image_size, image_size))
            images[i] = np.transpose(image, [2, 0, 1])
            names[i] = os.path.splitext(os.path.basename(image_path))[0]
        inputs = images.astype(np.float32) / 255 * 2.0 - 1.0
        # Run encoder.
        outputs = sess.run([ghfeat, x_rec], {x: inputs})
        features.append(outputs[0][0:len(batch)])
        outputs[1] = adjust_pixel_range(outputs[1])
        for i, _ in enumerate(batch):
            image = np.transpose(images[i], [1, 2, 0])
            save_image(f'{output_dir}/{names[i]}_ori.png', image)
            save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i])
            visualizer.set_cell(i + img_idx, 0, text=names[i])
            visualizer.set_cell(i + img_idx, 1, image=image)
            visualizer.set_cell(i + img_idx, 2, image=outputs[1][i])

    # Save results.
    os.system(f'cp {args.image_list} {output_dir}/image_list.txt')
    np.save(f'{output_dir}/ghfeat.npy', np.concatenate(features, axis=0))
    visualizer.save(f'{output_dir}/reconstruction.html')