def predict(self, image, description): self.args.description = description self.args.image_path = image inverter = StyleGANInverter( self.args.model_name, mode=self.args.mode, learning_rate=self.args.learning_rate, iteration=self.args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=self.args.loss_weight_feat, regularization_loss_weight=self.args.loss_weight_enc, clip_loss_weight=self.args.loss_weight_clip, description=self.args.description, logger=None, ) image_size = inverter.G.resolution # Invert the given image. image = resize_image( load_image(str(self.args.image_path)), (image_size, image_size) ) _, viz_results = inverter.easy_invert(image, num_viz=self.args.num_results) out_path = Path(tempfile.mkdtemp()) / "out.png" save_image(str(out_path), viz_results[-1]) return out_path
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.isfile(args.image_path) output_dir = args.output_dir or f'results/inversion/test' if not os.path.exists(output_dir): os.makedirs(output_dir) inverter = StyleGANInverter( args.model_name, mode=args.mode, learning_rate=args.learning_rate, iteration=args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=args.loss_weight_feat, regularization_loss_weight=args.loss_weight_enc, clip_loss_weight=args.loss_weight_clip, description=args.description, logger=None) image_size = inverter.G.resolution # Invert the given image. image = resize_image(load_image(args.image_path), (image_size, image_size)) _, viz_results = inverter.easy_invert(image, num_viz=args.num_results) if args.mode == 'man': image_name = os.path.splitext(os.path.basename(args.image_path))[0] else: image_name = 'gen' save_image(f'{output_dir}/{image_name}_enc.png', viz_results[1]) save_image(f'{output_dir}/{image_name}_inv.png', viz_results[-1]) print(f'save {image_name} in {output_dir}')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/inversion/{image_list_name}' logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger') logger.info(f'Loading model.') inverter = StyleGANInverter( args.model_name, learning_rate=args.learning_rate, iteration=args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=args.loss_weight_feat, regularization_loss_weight=args.loss_weight_enc, logger=logger) image_size = inverter.G.resolution # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Initialize visualizer. save_interval = args.num_iterations // args.num_results headers = ['Name', 'Original Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer( num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) # Invert images. logger.info(f'Start inversion.') latent_codes = [] for img_idx in tqdm(range(len(image_list)), leave=False): image_path = image_list[img_idx] image_name = os.path.splitext(os.path.basename(image_path))[0] image = resize_image(load_image(image_path), (image_size, image_size)) code, viz_results = inverter.easy_invert(image, num_viz=args.num_results) latent_codes.append(code) save_image(f'{output_dir}/{image_name}_ori.png', image) save_image(f'{output_dir}/{image_name}_enc.png', viz_results[1]) save_image(f'{output_dir}/{image_name}_inv.png', viz_results[-1]) visualizer.set_cell(img_idx, 0, text=image_name) visualizer.set_cell(img_idx, 1, image=image) for viz_idx, viz_img in enumerate(viz_results[1:]): visualizer.set_cell(img_idx, viz_idx + 2, image=viz_img) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/inversion.html')
def load_test_images(dir, image_size = 128): img_paths = get_test_images_paths(dir) imgs = [] for i, image_path in enumerate(img_paths): IMG = resize_image(load_image(image_path), (image_size, image_size)) imgs.append(IMG) return imgs
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id src_dir = args.src_dir src_dir_name = os.path.basename(src_dir.rstrip('/')) assert os.path.exists(src_dir) assert os.path.exists(f'{src_dir}/image_list.txt') assert os.path.exists(f'{src_dir}/inverted_codes.npy') dst_dir = args.dst_dir dst_dir_name = os.path.basename(dst_dir.rstrip('/')) assert os.path.exists(dst_dir) assert os.path.exists(f'{dst_dir}/image_list.txt') assert os.path.exists(f'{dst_dir}/inverted_codes.npy') output_dir = args.output_dir or 'results/interpolation' job_name = f'{src_dir_name}_TO_{dst_dir_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') # Load model. logger.info(f'Loading generator.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: _, _, _, Gs = pickle.load(f) # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() num_layers, latent_dim = Gs.components.synthesis.input_shape[1:3] wp = tf.placeholder(tf.float32, [args.batch_size, num_layers, latent_dim], name='latent_code') x = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) # Load image and codes. logger.info(f'Loading images and corresponding inverted latent codes.') src_list = [] with open(f'{src_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{src_dir}/{name}_ori.png') src_list.append(name) src_codes = np.load(f'{src_dir}/inverted_codes.npy') assert src_codes.shape[0] == len(src_list) num_src = src_codes.shape[0] dst_list = [] with open(f'{dst_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{dst_dir}/{name}_ori.png') dst_list.append(name) dst_codes = np.load(f'{dst_dir}/inverted_codes.npy') assert dst_codes.shape[0] == len(dst_list) num_dst = dst_codes.shape[0] # Interpolate images. logger.info(f'Start interpolation.') step = args.step + 2 viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_src * num_dst, num_cols=step + 2, viz_size=viz_size) visualizer.set_headers(['Source', 'Source Inversion'] + [f'Step {i:02d}' for i in range(1, step - 1)] + ['Target Inversion', 'Target']) inputs = np.zeros((args.batch_size, num_layers, latent_dim), np.float32) for src_idx in tqdm(range(num_src), leave=False): src_code = src_codes[src_idx:src_idx + 1] src_path = f'{src_dir}/{src_list[src_idx]}_ori.png' codes = interpolate(src_codes=np.repeat(src_code, num_dst, axis=0), dst_codes=dst_codes, step=step) for dst_idx in tqdm(range(num_dst), leave=False): dst_path = f'{dst_dir}/{dst_list[dst_idx]}_ori.png' output_images = [] for idx in range(0, step, args.batch_size): batch = codes[dst_idx, idx:idx + args.batch_size] inputs[0:len(batch)] = batch images = sess.run(x, feed_dict={wp: inputs}) output_images.append(images[0:len(batch)]) output_images = adjust_pixel_range( np.concatenate(output_images, axis=0)) row_idx = src_idx * num_dst + dst_idx visualizer.set_cell(row_idx, 0, image=load_image(src_path)) visualizer.set_cell(row_idx, step + 1, image=load_image(dst_path)) for s, output_image in enumerate(output_images): if s == 5 and row_idx == 2: save_image(f'./results/interpolation/005_int.png', output_image) visualizer.set_cell(row_idx, s + 1, image=output_image) # Save results. visualizer.save(f'{output_dir}/{job_name}.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id image_dir = args.image_dir image_dir_name = os.path.basename(image_dir.rstrip('/')) assert os.path.exists(image_dir) assert os.path.exists(f'{image_dir}/image_list.txt') assert os.path.exists(f'{image_dir}/inverted_codes.npy') boundary_path = args.boundary_path assert os.path.exists(boundary_path) boundary_name = os.path.splitext(os.path.basename(boundary_path))[0] output_dir = args.output_dir or 'results/manipulation' job_name = f'{boundary_name.upper()}_{image_dir_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') # Load model. logger.info(f'Loading generator.') generator = build_generator(args.model_name) # Load image, codes, and boundary. logger.info(f'Loading images and corresponding inverted latent codes.') image_list = [] with open(f'{image_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{image_dir}/{name}_ori.png') assert os.path.exists(f'{image_dir}/{name}_inv.png') image_list.append(name) latent_codes = np.load(f'{image_dir}/inverted_codes.npy') assert latent_codes.shape[0] == len(image_list) num_images = latent_codes.shape[0] logger.info(f'Loading boundary.') boundary_file = np.load(boundary_path, allow_pickle=True)[()] if isinstance(boundary_file, dict): boundary = boundary_file['boundary'] manipulate_layers = boundary_file['meta_data']['manipulate_layers'] else: boundary = boundary_file manipulate_layers = args.manipulate_layers if manipulate_layers: logger.info(f' Manipulating on layers `{manipulate_layers}`.') else: logger.info(f' Manipulating on ALL layers.') # Manipulate images. logger.info(f'Start manipulation.') step = args.step viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_images, num_cols=step + 3, viz_size=viz_size) visualizer.set_headers(['Name', 'Origin', 'Inverted'] + [f'Step {i:02d}' for i in range(1, step + 1)]) for img_idx, img_name in enumerate(image_list): ori_image = load_image(f'{image_dir}/{img_name}_ori.png') inv_image = load_image(f'{image_dir}/{img_name}_inv.png') visualizer.set_cell(img_idx, 0, text=img_name) visualizer.set_cell(img_idx, 1, image=ori_image) visualizer.set_cell(img_idx, 2, image=inv_image) codes = manipulate(latent_codes=latent_codes, boundary=boundary, start_distance=args.start_distance, end_distance=args.end_distance, step=step, layerwise_manipulation=True, num_layers=generator.num_layers, manipulate_layers=manipulate_layers, is_code_layerwise=True, is_boundary_layerwise=True) for img_idx in tqdm(range(num_images), leave=False): output_images = generator.easy_synthesize( codes[img_idx], latent_space_type='wp')['image'] for s, output_image in enumerate(output_images): visualizer.set_cell(img_idx, s + 3, image=output_image) # Save results. visualizer.save(f'{output_dir}/{job_name}.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.target_list) target_list_name = os.path.splitext(os.path.basename(args.target_list))[0] assert os.path.exists(args.context_list) context_list_name = os.path.splitext(os.path.basename( args.context_list))[0] output_dir = args.output_dir or f'results/diffusion' job_name = f'{target_list_name}_TO_{context_list_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] crop_size = args.crop_size crop_x = args.center_x - crop_size // 2 crop_y = args.center_y - crop_size // 2 mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32) mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0 # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1 x_mask_255 = (x_mask + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = args.batch_size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1 x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. logger.info(f'Setting configuration for optimization.') perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_mask_255) x_rec_feat = perceptual_model(x_rec_mask_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3]) loss = loss_pix + args.loss_weight_feat * loss_feat optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Load image list. logger.info(f'Loading target images and context images.') target_list = [] with open(args.target_list, 'r') as f: for line in f: target_list.append(line.strip()) num_targets = len(target_list) context_list = [] with open(args.context_list, 'r') as f: for line in f: context_list.append(line.strip()) num_contexts = len(context_list) num_pairs = num_targets * num_contexts # Invert images. logger.info(f'Start diffusion.') save_interval = args.num_iterations // args.num_results headers = [ 'Target Image', 'Context Image', 'Stitched Image', 'Encoder Output' ] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_pairs, num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) latent_codes_enc = [] latent_codes = [] for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False): # Load target. target_image = resize_image(load_image(target_list[target_idx]), (image_size, image_size)) visualizer.set_cell(target_idx * num_contexts, 0, image=target_image) for context_idx in tqdm(range(0, num_contexts, args.batch_size), desc='Context ID', leave=False): row_idx = target_idx * num_contexts + context_idx batch = context_list[context_idx:context_idx + args.batch_size] for i, context_image_path in enumerate(batch): context_image = resize_image(load_image(context_image_path), (image_size, image_size)) visualizer.set_cell(row_idx + i, 1, image=context_image) context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = (target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]) visualizer.set_cell(row_idx + i, 2, image=context_image) images[i] = np.transpose(context_image, [2, 0, 1]) inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. sess.run([setter], {x: inputs}) outputs = sess.run([wp, x_rec]) latent_codes_enc.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): visualizer.set_cell(row_idx + i, 3, image=outputs[1][i]) # Optimize latent codes. col_idx = 4 for step in tqdm(range(1, args.num_iterations + 1), leave=False): sess.run(train_op, {x: inputs}) if step == args.num_iterations or step % save_interval == 0: outputs = sess.run([wp, x_rec]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): visualizer.set_cell(row_idx + i, col_idx, image=outputs[1][i]) col_idx += 1 latent_codes.append(outputs[0][0:len(batch)]) # Save results. code_shape = [num_targets, num_contexts] + list(latent_shape[1:]) np.save(f'{output_dir}/{job_name}_encoded_codes.npy', np.concatenate(latent_codes_enc, axis=0).reshape(code_shape)) np.save(f'{output_dir}/{job_name}_inverted_codes.npy', np.concatenate(latent_codes, axis=0).reshape(code_shape)) visualizer.save(f'{output_dir}/{job_name}.html')
def synthesize(self, num, z=None, html_name=None, save_raw_synthesis=False): """Synthesizes images. Args: num: Number of images to synthesize. z: Latent codes used for generation. If not specified, this function will sample latent codes randomly. (default: None) html_name: Name of the output html page for visualization. If not specified, no visualization page will be saved. (default: None) save_raw_synthesis: Whether to save raw synthesis on the disk. (default: False) """ if not html_name and not save_raw_synthesis: return self.set_mode('val') temp_dir = os.path.join(self.work_dir, 'synthesize_results') os.makedirs(temp_dir, exist_ok=True) if z is not None: assert isinstance(z, np.ndarray) assert z.ndim == 2 and z.shape[1] == self.z_space_dim num = min(num, z.shape[0]) z = torch.from_numpy(z).type(torch.FloatTensor) if not num: return # TODO: Use same z during the entire training process. self.logger.init_pbar() task1 = self.logger.add_pbar_task('Synthesize', total=num) indices = list(range(self.rank, num, self.world_size)) for batch_idx in range(0, len(indices), self.val_batch_size): sub_indices = indices[batch_idx:batch_idx + self.val_batch_size] batch_size = len(sub_indices) if z is None: code = torch.randn(batch_size, self.z_space_dim).cuda() else: code = z[sub_indices].cuda() with torch.no_grad(): if 'generator_smooth' in self.models: G = self.models['generator_smooth'] else: G = self.models['generator'] images = G(code, **self.G_kwargs_val)['image'] images = self.postprocess(images) for sub_idx, image in zip(sub_indices, images): save_image(os.path.join(temp_dir, f'{sub_idx:06d}.jpg'), image) self.logger.update_pbar(task1, batch_size * self.world_size) dist.barrier() if self.rank != 0: return if html_name: task2 = self.logger.add_pbar_task('Visualize', total=num) html = HtmlPageVisualizer(grid_size=num) for image_idx in range(num): image = load_image( os.path.join(temp_dir, f'{image_idx:06d}.jpg')) row_idx, col_idx = divmod(image_idx, html.num_cols) html.set_cell(row_idx, col_idx, image=image, text=f'Sample {image_idx:06d}') self.logger.update_pbar(task2, 1) html.save(os.path.join(self.work_dir, html_name)) if not save_raw_synthesis: shutil.rmtree(temp_dir) self.logger.close_pbar()
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id image_dir = args.image_dir image_dir_name = os.path.basename(image_dir.rstrip('/')) assert os.path.exists(image_dir) assert os.path.exists(f'{image_dir}/image_list.txt') assert os.path.exists(f'{image_dir}/inverted_codes.npy') boundary_path = args.boundary_path assert os.path.exists(boundary_path) boundary_name = os.path.splitext(os.path.basename(boundary_path))[0] output_dir = args.output_dir or 'results/manipulation' job_name = f'{boundary_name.upper()}_{image_dir_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') # Load model. logger.info(f'Loading generator.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: _, _, _, Gs = pickle.load(f) # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() num_layers, latent_dim = Gs.components.synthesis.input_shape[1:3] wp = tf.placeholder(tf.float32, [args.batch_size, num_layers, latent_dim], name='latent_code') x = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) # Load image, codes, and boundary. logger.info(f'Loading images and corresponding inverted latent codes.') image_list = [] with open(f'{image_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{image_dir}/{name}_ori.png') assert os.path.exists(f'{image_dir}/{name}_inv.png') image_list.append(name) latent_codes = np.load(f'{image_dir}/inverted_codes.npy') assert latent_codes.shape[0] == len(image_list) num_images = latent_codes.shape[0] logger.info(f'Loading boundary.') boundary_file = np.load(boundary_path, allow_pickle=True)[()] if isinstance(boundary_file, dict): boundary = boundary_file['boundary'] manipulate_layers = boundary_file['meta_data']['manipulate_layers'] else: boundary = boundary_file manipulate_layers = args.manipulate_layers if manipulate_layers: logger.info(f' Manipulating on layers `{manipulate_layers}`.') else: logger.info(f' Manipulating on ALL layers.') # Manipulate images. logger.info(f'Start manipulation.') step = args.step viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_images, num_cols=step + 3, viz_size=viz_size) visualizer.set_headers(['Name', 'Origin', 'Inverted'] + [f'Step {i:02d}' for i in range(1, step + 1)]) for img_idx, img_name in enumerate(image_list): ori_image = load_image(f'{image_dir}/{img_name}_ori.png') inv_image = load_image(f'{image_dir}/{img_name}_inv.png') visualizer.set_cell(img_idx, 0, text=img_name) visualizer.set_cell(img_idx, 1, image=ori_image) visualizer.set_cell(img_idx, 2, image=inv_image) codes = manipulate(latent_codes=latent_codes, boundary=boundary, start_distance=args.start_distance, end_distance=args.end_distance, step=step, layerwise_manipulation=True, num_layers=num_layers, manipulate_layers=manipulate_layers, is_code_layerwise=True, is_boundary_layerwise=True) inputs = np.zeros((args.batch_size, num_layers, latent_dim), np.float32) for img_idx in tqdm(range(num_images), leave=False): output_images = [] for idx in range(0, step, args.batch_size): batch = codes[img_idx, idx:idx + args.batch_size] inputs[0:len(batch)] = batch images = sess.run(x, feed_dict={wp: inputs}) output_images.append(images[0:len(batch)]) output_images = adjust_pixel_range( np.concatenate(output_images, axis=0)) for s, output_image in enumerate(output_images): visualizer.set_cell(img_idx, s + 3, image=output_image) # Save results. visualizer.save(f'{output_dir}/{job_name}.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id style_dir = args.style_dir style_dir_name = os.path.basename(style_dir.rstrip('/')) assert os.path.exists(style_dir) assert os.path.exists(f'{style_dir}/image_list.txt') assert os.path.exists(f'{style_dir}/inverted_codes.npy') content_dir = args.content_dir content_dir_name = os.path.basename(content_dir.rstrip('/')) assert os.path.exists(content_dir) assert os.path.exists(f'{content_dir}/image_list.txt') assert os.path.exists(f'{content_dir}/inverted_codes.npy') output_dir = args.output_dir or 'results/style_mixing' job_name = f'{style_dir_name}_STYLIZE_{content_dir_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') # Load model. logger.info(f'Loading generator.') generator = build_generator(args.model_name) mix_layers = list(range(args.mix_layer_start_idx, generator.num_layers)) # Load image and codes. logger.info(f'Loading images and corresponding inverted latent codes.') style_list = [] with open(f'{style_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{style_dir}/{name}_ori.png') style_list.append(name) logger.info(f'Loading inverted latent codes.') style_codes = np.load(f'{style_dir}/inverted_codes.npy') assert style_codes.shape[0] == len(style_list) num_styles = style_codes.shape[0] content_list = [] with open(f'{content_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{content_dir}/{name}_ori.png') content_list.append(name) logger.info(f'Loading inverted latent codes.') content_codes = np.load(f'{content_dir}/inverted_codes.npy') assert content_codes.shape[0] == len(content_list) num_contents = content_codes.shape[0] # Mix styles. logger.info(f'Start style mixing.') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_styles + 1, num_cols=num_contents + 1, viz_size=viz_size) visualizer.set_headers(['Style'] + [f'Content {i:03d}' for i in range(num_contents)]) for style_idx, style_name in enumerate(style_list): style_image = load_image(f'{style_dir}/{style_name}_ori.png') visualizer.set_cell(style_idx + 1, 0, image=style_image) for content_idx, content_name in enumerate(content_list): content_image = load_image(f'{content_dir}/{content_name}_ori.png') visualizer.set_cell(0, content_idx + 1, image=content_image) codes = mix_style(style_codes=style_codes, content_codes=content_codes, num_layers=generator.num_layers, mix_layers=mix_layers) for style_idx in tqdm(range(num_styles), leave=False): output_images = generator.easy_synthesize( codes[style_idx], latent_space_type='wp')['image'] for content_idx, output_image in enumerate(output_images): visualizer.set_cell(style_idx + 1, content_idx + 1, image=output_image) # Save results. visualizer.save(f'{output_dir}/{job_name}.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.target_list) target_list_name = os.path.splitext(os.path.basename(args.target_list))[0] assert os.path.exists(args.context_list) context_list_name = os.path.splitext(os.path.basename(args.context_list))[0] output_dir = args.output_dir or f'results/diffusion' job_name = f'{target_list_name}_TO_{context_list_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') logger.info(f'Loading model.') inverter = StyleGANInverter( args.model_name, learning_rate=args.learning_rate, iteration=args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=args.loss_weight_feat, regularization_loss_weight=0.0, logger=logger) image_size = inverter.G.resolution # Load image list. logger.info(f'Loading target images and context images.') target_list = [] with open(args.target_list, 'r') as f: for line in f: target_list.append(line.strip()) num_targets = len(target_list) context_list = [] with open(args.context_list, 'r') as f: for line in f: context_list.append(line.strip()) num_contexts = len(context_list) num_pairs = num_targets * num_contexts # Initialize visualizer. save_interval = args.num_iterations // args.num_results headers = ['Target Image', 'Context Image', 'Stitched Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer( num_rows=num_pairs, num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) # Diffuse images. logger.info(f'Start diffusion.') latent_codes = [] for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False): # Load target. target_image = resize_image(load_image(target_list[target_idx]), (image_size, image_size)) visualizer.set_cell(target_idx * num_contexts, 0, image=target_image) for context_idx in tqdm(range(num_contexts), desc='Context ID', leave=False): row_idx = target_idx * num_contexts + context_idx context_image = resize_image(load_image(context_list[context_idx]), (image_size, image_size)) visualizer.set_cell(row_idx, 1, image=context_image) code, viz_results = inverter.easy_diffuse(target=target_image, context=context_image, center_x=args.center_x, center_y=args.center_y, crop_x=args.crop_size, crop_y=args.crop_size, num_viz=args.num_results) for viz_idx, viz_img in enumerate(viz_results): visualizer.set_cell(row_idx, viz_idx + 2, image=viz_img) latent_codes.append(code) # Save results. os.system(f'cp {args.target_list} {output_dir}/target_list.txt') os.system(f'cp {args.context_list} {output_dir}/context_list.txt') np.save(f'{output_dir}/{job_name}_inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/{job_name}.html')
def encode(_target_image, _context_image, _output_dir): gpu_id = '0' os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id print(_target_image) assert os.path.exists('./static/' + _target_image) _output_dir = _output_dir[:-4] output_dir = './static/' + _output_dir tflib.init_tf({'rnd.np_random_seed': 1000}) model_path = './styleganinv_face_256.pkl' with open(model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] crop_size = 110 # default crop size. center_x = 125 center_y = 145 crop_x = center_x - crop_size // 2 # default coordinate-X crop_y = center_y - crop_size // 2 # default coordinate-Y mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32) mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0 # Build graph. sess = tf.get_default_session() batch_size = 4 input_shape = E.input_shape input_shape[0] = batch_size # default batch size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1 x_mask_255 = (x_mask + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = batch_size # default batch size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1 x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. print("Diffusion : Settings for Optimization.") perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_mask_255) x_rec_feat = perceptual_model(x_rec_mask_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3]) loss_weight_feat = 5e-5 learning_rate = 0.01 loss = loss_pix + loss_weight_feat * loss_feat # default The perceptual loss scale for optimization. (default 5e-5) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Invert image num_iterations = 100 num_results = 5 save_interval = num_iterations // num_results images = np.zeros(input_shape, np.uint8) print("Load target image.") _target_image = './static/' + _target_image target_image = resize_image(load_image(_target_image), (image_size, image_size)) save_image('./' + output_dir + '_tar.png', target_image) print("Load context image.") context_image = getContextImage(_context_image) context_image = resize_image(load_image(context_image), (image_size, image_size)) save_image('./' + output_dir + '_cont.png', context_image) # Inverting Context Image. # context_image = invert(model_path, getContextImage(_context_image), wp, latent_shape) save_image('./' + output_dir + '_cont_inv.png', context_image) # Create Stitched Image # context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = ( # target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] # ) # context_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] = ( # target_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] # ) print("Cropping Image...") # context_image = cropImage(target_image, context_image) target_image, rect = cropWithWhite(target_image) target_image = fourChannels(target_image) target_image = cut(target_image) target_image = transBg(target_image) context_image = createStitchedImage(context_image, target_image, rect) save_image('./' + output_dir + '_sti.png', context_image) images[0] = np.transpose(context_image, [2, 0, 1]) input = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder print("Start Diffusion.") sess.run([setter], {x: input}) output = sess.run([wp, x_rec]) output[1] = adjust_pixel_range(output[1]) col_idx = 4 for step in tqdm(range(1, num_iterations + 1), leave=False): sess.run(train_op, {x: input}) if step == num_iterations or step % save_interval == 0: output = sess.run([wp, x_rec]) output[1] = adjust_pixel_range(output[1]) if step == num_iterations: save_image(f'{output_dir}.png', output[1][0]) col_idx += 1 exit()
def invert(model_path, _image, _wp, _latent_shape): print("Inverting") tflib.init_tf({'rnd.np_random_seed': 1000}) with open(model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] # Build graph. print("Inverting : Build Graph.") sess = tf.get_default_session() batch_size = 4 input_shape = E.input_shape input_shape[0] = batch_size # default batch size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255 wp = _wp x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, _latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. print("Inverting : Settings for Optimization.") perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_255) x_rec_feat = perceptual_model(x_rec_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3]) w_enc_new = E.get_output_for(x_rec, phase=False) wp_enc_new = tf.reshape(w_enc_new, _latent_shape) loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2]) loss = (loss_pix + 5e-5 * loss_feat + 2.0 * loss_enc) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Invert image print("Start Inverting.") num_iterations = 40 num_results = 2 save_interval = num_iterations // num_results context_images = np.zeros(input_shape, np.uint8) context_image = resize_image(load_image(_image), (image_size, image_size)) # Inverting Context Image. context_images[0] = np.transpose(context_image, [2, 0, 1]) context_input = context_images.astype(np.float32) / 255 * 2.0 - 1.0 sess.run([setter], {x: context_input}) context_output = sess.run([wp, x_rec]) context_output[1] = adjust_pixel_range(context_output[1]) context_image = np.transpose(context_images[0], [1, 2, 0]) for step in tqdm(range(1, num_iterations + 1), leave=False): sess.run(train_op, {x: context_input}) if step == num_iterations or step % save_interval == 0: context_output = sess.run([wp, x_rec]) context_output[1] = adjust_pixel_range(context_output[1]) if step == num_iterations: context_image = context_output[1][0] return context_image
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/inversion/{image_list_name}' logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = args.batch_size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255 if args.random_init: logger.info(f' Use random initialization for optimization.') wp_rnd = tf.random.normal(shape=latent_shape, name='latent_code_init') setter = tf.assign(wp, wp_rnd) else: logger.info( f' Use encoder output as the initialization for optimization.') w_enc = E.get_output_for(x, is_training=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. logger.info(f'Setting configuration for optimization.') perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_255) x_rec_feat = perceptual_model(x_rec_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3]) if args.domain_regularizer: logger.info(f' Involve encoder for optimization.') w_enc_new = E.get_output_for(x_rec, is_training=False) wp_enc_new = tf.reshape(w_enc_new, latent_shape) loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2]) else: logger.info(f' Do NOT involve encoder for optimization.') loss_enc = 0 loss = (loss_pix + args.loss_weight_feat * loss_feat + args.loss_weight_enc * loss_enc) optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Invert images. logger.info(f'Start inversion.') save_interval = args.num_iterations // args.num_results headers = ['Name', 'Original Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) names = ['' for _ in range(args.batch_size)] latent_codes_enc = [] latent_codes = [] for img_idx in tqdm(range(0, len(image_list), args.batch_size), leave=False): # Load inputs. batch = image_list[img_idx:img_idx + args.batch_size] for i, image_path in enumerate(batch): image = resize_image(load_image(image_path), (image_size, image_size)) images[i] = np.transpose(image, [2, 0, 1]) names[i] = os.path.splitext(os.path.basename(image_path))[0] inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. sess.run([setter], {x: inputs}) outputs = sess.run([wp, x_rec]) latent_codes_enc.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): image = np.transpose(images[i], [1, 2, 0]) save_image(f'{output_dir}/{names[i]}_ori.png', image) save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i]) visualizer.set_cell(i + img_idx, 0, text=names[i]) visualizer.set_cell(i + img_idx, 1, image=image) visualizer.set_cell(i + img_idx, 2, image=outputs[1][i]) # Optimize latent codes. col_idx = 3 for step in tqdm(range(1, args.num_iterations + 1), leave=False): sess.run(train_op, {x: inputs}) if step == args.num_iterations or step % save_interval == 0: outputs = sess.run([wp, x_rec]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): if step == args.num_iterations: save_image(f'{output_dir}/{names[i]}_inv.png', outputs[1][i]) visualizer.set_cell(i + img_idx, col_idx, image=outputs[1][i]) col_idx += 1 latent_codes.append(outputs[0][0:len(batch)]) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/encoded_codes.npy', np.concatenate(latent_codes_enc, axis=0)) np.save(f'{output_dir}/inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/inversion.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id src_dir = args.src_dir src_dir_name = os.path.basename(src_dir.rstrip('/')) assert os.path.exists(src_dir) assert os.path.exists(f'{src_dir}/image_list.txt') assert os.path.exists(f'{src_dir}/inverted_codes.npy') dst_dir = args.dst_dir dst_dir_name = os.path.basename(dst_dir.rstrip('/')) assert os.path.exists(dst_dir) assert os.path.exists(f'{dst_dir}/image_list.txt') assert os.path.exists(f'{dst_dir}/inverted_codes.npy') output_dir = args.output_dir or 'results/interpolation' job_name = f'{src_dir_name}_TO_{dst_dir_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') # Load model. logger.info(f'Loading generator.') generator = build_generator(args.model_name) # Load image and codes. logger.info(f'Loading images and corresponding inverted latent codes.') src_list = [] with open(f'{src_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{src_dir}/{name}_ori.png') src_list.append(name) src_codes = np.load(f'{src_dir}/inverted_codes.npy') assert src_codes.shape[0] == len(src_list) num_src = src_codes.shape[0] dst_list = [] with open(f'{dst_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{dst_dir}/{name}_ori.png') dst_list.append(name) dst_codes = np.load(f'{dst_dir}/inverted_codes.npy') assert dst_codes.shape[0] == len(dst_list) num_dst = dst_codes.shape[0] # Interpolate images. logger.info(f'Start interpolation.') step = args.step + 2 viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_src * num_dst, num_cols=step + 2, viz_size=viz_size) visualizer.set_headers(['Source', 'Source Inversion'] + [f'Step {i:02d}' for i in range(1, step - 1)] + ['Target Inversion', 'Target']) for src_idx in tqdm(range(num_src), leave=False): src_code = src_codes[src_idx:src_idx + 1] src_path = f'{src_dir}/{src_list[src_idx]}_ori.png' codes = interpolate(src_codes=np.repeat(src_code, num_dst, axis=0), dst_codes=dst_codes, step=step) for dst_idx in tqdm(range(num_dst), leave=False): dst_path = f'{dst_dir}/{dst_list[dst_idx]}_ori.png' output_images = generator.easy_synthesize( codes[dst_idx], latent_space_type='wp')['image'] row_idx = src_idx * num_dst + dst_idx visualizer.set_cell(row_idx, 0, image=load_image(src_path)) visualizer.set_cell(row_idx, step + 1, image=load_image(dst_path)) for s, output_image in enumerate(output_images): visualizer.set_cell(row_idx, s + 1, image=output_image) # Save results. visualizer.save(f'{output_dir}/{job_name}.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id style_dir = args.style_dir style_dir_name = os.path.basename(style_dir.rstrip('/')) assert os.path.exists(style_dir) assert os.path.exists(f'{style_dir}/image_list.txt') assert os.path.exists(f'{style_dir}/inverted_codes.npy') content_dir = args.content_dir content_dir_name = os.path.basename(content_dir.rstrip('/')) assert os.path.exists(content_dir) assert os.path.exists(f'{content_dir}/image_list.txt') assert os.path.exists(f'{content_dir}/inverted_codes.npy') output_dir = args.output_dir or 'results/style_mixing' job_name = f'{style_dir_name}_STYLIZE_{content_dir_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') # Load model. logger.info(f'Loading generator.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: _, _, _, Gs = pickle.load(f) # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() num_layers, latent_dim = Gs.components.synthesis.input_shape[1:3] wp = tf.placeholder( tf.float32, [args.batch_size, num_layers, latent_dim], name='latent_code') x = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) mix_layers = list(range(args.mix_layer_start_idx, num_layers)) # Load image and codes. logger.info(f'Loading images and corresponding inverted latent codes.') style_list = [] with open(f'{style_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{style_dir}/{name}_ori.png') style_list.append(name) logger.info(f'Loading inverted latent codes.') style_codes = np.load(f'{style_dir}/inverted_codes.npy') assert style_codes.shape[0] == len(style_list) num_styles = style_codes.shape[0] content_list = [] with open(f'{content_dir}/image_list.txt', 'r') as f: for line in f: name = os.path.splitext(os.path.basename(line.strip()))[0] assert os.path.exists(f'{content_dir}/{name}_ori.png') content_list.append(name) logger.info(f'Loading inverted latent codes.') content_codes = np.load(f'{content_dir}/inverted_codes.npy') assert content_codes.shape[0] == len(content_list) num_contents = content_codes.shape[0] # Mix styles. logger.info(f'Start style mixing.') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer( num_rows=num_styles + 1, num_cols=num_contents + 1, viz_size=viz_size) visualizer.set_headers( ['Style'] + [f'Content {i:03d}' for i in range(num_contents)] ) for style_idx, style_name in enumerate(style_list): style_image = load_image(f'{style_dir}/{style_name}_ori.png') visualizer.set_cell(style_idx + 1, 0, image=style_image) for content_idx, content_name in enumerate(content_list): content_image = load_image(f'{content_dir}/{content_name}_ori.png') visualizer.set_cell(0, content_idx + 1, image=content_image) codes = mix_style(style_codes=style_codes, content_codes=content_codes, num_layers=num_layers, mix_layers=mix_layers) inputs = np.zeros((args.batch_size, num_layers, latent_dim), np.float32) for style_idx in tqdm(range(num_styles), leave=False): output_images = [] for idx in range(0, num_contents, args.batch_size): batch = codes[style_idx, idx:idx + args.batch_size] inputs[0:len(batch)] = batch images = sess.run(x, feed_dict={wp: inputs}) output_images.append(images[0:len(batch)]) output_images = adjust_pixel_range(np.concatenate(output_images, axis=0)) for content_idx, output_image in enumerate(output_images): visualizer.set_cell(style_idx + 1, content_idx + 1, image=output_image) # Save results. visualizer.save(f'{output_dir}/{job_name}.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/ghfeat/{image_list_name}' logger = setup_logger(output_dir, 'extract_feature.log', 'inversion_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] G_args = EasyDict(func_name='training.networks_stylegan.G_synthesis') G_style_mod = tflib.Network('G_StyleMod', resolution=image_size, label_size=0, **G_args) Gs_vars_pairs = { name: tflib.run(val) for name, val in Gs.components.synthesis.vars.items() } for g_name, g_val in G_style_mod.vars.items(): tflib.set_vars({g_val: Gs_vars_pairs[g_name]}) # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') ghfeat = E.get_output_for(x, is_training=False) x_rec = G_style_mod.get_output_for(ghfeat, randomize_noise=False) # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Extract GH-Feat from images. logger.info(f'Start feature extraction.') headers = ['Name', 'Original Image', 'Encoder Output'] viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) names = ['' for _ in range(args.batch_size)] features = [] for img_idx in tqdm(range(0, len(image_list), args.batch_size), leave=False): # Load inputs. batch = image_list[img_idx:img_idx + args.batch_size] for i, image_path in enumerate(batch): image = resize_image(load_image(image_path), (image_size, image_size)) images[i] = np.transpose(image, [2, 0, 1]) names[i] = os.path.splitext(os.path.basename(image_path))[0] inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. outputs = sess.run([ghfeat, x_rec], {x: inputs}) features.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): image = np.transpose(images[i], [1, 2, 0]) save_image(f'{output_dir}/{names[i]}_ori.png', image) save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i]) visualizer.set_cell(i + img_idx, 0, text=names[i]) visualizer.set_cell(i + img_idx, 1, image=image) visualizer.set_cell(i + img_idx, 2, image=outputs[1][i]) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/ghfeat.npy', np.concatenate(features, axis=0)) visualizer.save(f'{output_dir}/reconstruction.html')