def main(): """Main function.""" inverter = StyleGANInverter(model_name, mode=mode, learning_rate=ini_lr, iteration=step, reconstruction_loss_weight=lambda_l2, perceptual_loss_weight=lambda_feat, regularization_loss_weight=lambda_enc, clip_loss_weight=lambda_clip, description=description) image_size = inverter.G.resolution text_inputs = torch.cat([clip.tokenize(description)]).cuda() # Invert images. # uploaded_file = uploaded_file.read() if uploaded_file is not None: image = Image.open(uploaded_file) # st.image(image, caption='Uploaded Image.', use_column_width=True) # st.write("") st.write("Just a second...") image = resize_image(np.array(image), (image_size, image_size)) _, viz_results = inverter.easy_invert(image, 1) if mode == 'man': final_result = np.hstack([image, viz_results[-1]]) else: final_result = np.hstack([viz_results[1], viz_results[-1]]) # return final_result with st.beta_container(): st.image(final_result, use_column_width=True)
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.isfile(args.image_path) output_dir = args.output_dir or f'results/inversion/test' if not os.path.exists(output_dir): os.makedirs(output_dir) inverter = StyleGANInverter( args.model_name, mode=args.mode, learning_rate=args.learning_rate, iteration=args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=args.loss_weight_feat, regularization_loss_weight=args.loss_weight_enc, clip_loss_weight=args.loss_weight_clip, description=args.description, logger=None) image_size = inverter.G.resolution # Invert the given image. image = resize_image(load_image(args.image_path), (image_size, image_size)) _, viz_results = inverter.easy_invert(image, num_viz=args.num_results) if args.mode == 'man': image_name = os.path.splitext(os.path.basename(args.image_path))[0] else: image_name = 'gen' save_image(f'{output_dir}/{image_name}_enc.png', viz_results[1]) save_image(f'{output_dir}/{image_name}_inv.png', viz_results[-1]) print(f'save {image_name} in {output_dir}')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/inversion/{image_list_name}' logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger') logger.info(f'Loading model.') inverter = StyleGANInverter( args.model_name, learning_rate=args.learning_rate, iteration=args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=args.loss_weight_feat, regularization_loss_weight=args.loss_weight_enc, logger=logger) image_size = inverter.G.resolution # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Initialize visualizer. save_interval = args.num_iterations // args.num_results headers = ['Name', 'Original Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer( num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) # Invert images. logger.info(f'Start inversion.') latent_codes = [] for img_idx in tqdm(range(len(image_list)), leave=False): image_path = image_list[img_idx] image_name = os.path.splitext(os.path.basename(image_path))[0] image = resize_image(load_image(image_path), (image_size, image_size)) code, viz_results = inverter.easy_invert(image, num_viz=args.num_results) latent_codes.append(code) save_image(f'{output_dir}/{image_name}_ori.png', image) save_image(f'{output_dir}/{image_name}_enc.png', viz_results[1]) save_image(f'{output_dir}/{image_name}_inv.png', viz_results[-1]) visualizer.set_cell(img_idx, 0, text=image_name) visualizer.set_cell(img_idx, 1, image=image) for viz_idx, viz_img in enumerate(viz_results[1:]): visualizer.set_cell(img_idx, viz_idx + 2, image=viz_img) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/inversion.html')
def predict(self, image, description): self.args.description = description self.args.image_path = image inverter = StyleGANInverter( self.args.model_name, mode=self.args.mode, learning_rate=self.args.learning_rate, iteration=self.args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=self.args.loss_weight_feat, regularization_loss_weight=self.args.loss_weight_enc, clip_loss_weight=self.args.loss_weight_clip, description=self.args.description, logger=None, ) image_size = inverter.G.resolution # Invert the given image. image = resize_image( load_image(str(self.args.image_path)), (image_size, image_size) ) _, viz_results = inverter.easy_invert(image, num_viz=self.args.num_results) out_path = Path(tempfile.mkdtemp()) / "out.png" save_image(str(out_path), viz_results[-1]) return out_path
def load_test_images(dir, image_size = 128): img_paths = get_test_images_paths(dir) imgs = [] for i, image_path in enumerate(img_paths): IMG = resize_image(load_image(image_path), (image_size, image_size)) imgs.append(IMG) return imgs
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/inversion/{image_list_name}' logger = setup_logger(output_dir, 'inversion.log', 'inversion_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = args.batch_size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255 if args.random_init: logger.info(f' Use random initialization for optimization.') wp_rnd = tf.random.normal(shape=latent_shape, name='latent_code_init') setter = tf.assign(wp, wp_rnd) else: logger.info( f' Use encoder output as the initialization for optimization.') w_enc = E.get_output_for(x, is_training=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. logger.info(f'Setting configuration for optimization.') perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_255) x_rec_feat = perceptual_model(x_rec_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3]) if args.domain_regularizer: logger.info(f' Involve encoder for optimization.') w_enc_new = E.get_output_for(x_rec, is_training=False) wp_enc_new = tf.reshape(w_enc_new, latent_shape) loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2]) else: logger.info(f' Do NOT involve encoder for optimization.') loss_enc = 0 loss = (loss_pix + args.loss_weight_feat * loss_feat + args.loss_weight_enc * loss_enc) optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Invert images. logger.info(f'Start inversion.') save_interval = args.num_iterations // args.num_results headers = ['Name', 'Original Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) names = ['' for _ in range(args.batch_size)] latent_codes_enc = [] latent_codes = [] for img_idx in tqdm(range(0, len(image_list), args.batch_size), leave=False): # Load inputs. batch = image_list[img_idx:img_idx + args.batch_size] for i, image_path in enumerate(batch): image = resize_image(load_image(image_path), (image_size, image_size)) images[i] = np.transpose(image, [2, 0, 1]) names[i] = os.path.splitext(os.path.basename(image_path))[0] inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. sess.run([setter], {x: inputs}) outputs = sess.run([wp, x_rec]) latent_codes_enc.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): image = np.transpose(images[i], [1, 2, 0]) save_image(f'{output_dir}/{names[i]}_ori.png', image) save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i]) visualizer.set_cell(i + img_idx, 0, text=names[i]) visualizer.set_cell(i + img_idx, 1, image=image) visualizer.set_cell(i + img_idx, 2, image=outputs[1][i]) # Optimize latent codes. col_idx = 3 for step in tqdm(range(1, args.num_iterations + 1), leave=False): sess.run(train_op, {x: inputs}) if step == args.num_iterations or step % save_interval == 0: outputs = sess.run([wp, x_rec]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): if step == args.num_iterations: save_image(f'{output_dir}/{names[i]}_inv.png', outputs[1][i]) visualizer.set_cell(i + img_idx, col_idx, image=outputs[1][i]) col_idx += 1 latent_codes.append(outputs[0][0:len(batch)]) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/encoded_codes.npy', np.concatenate(latent_codes_enc, axis=0)) np.save(f'{output_dir}/inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/inversion.html')
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.target_list) target_list_name = os.path.splitext(os.path.basename(args.target_list))[0] assert os.path.exists(args.context_list) context_list_name = os.path.splitext(os.path.basename( args.context_list))[0] output_dir = args.output_dir or f'results/diffusion' job_name = f'{target_list_name}_TO_{context_list_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] crop_size = args.crop_size crop_x = args.center_x - crop_size // 2 crop_y = args.center_y - crop_size // 2 mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32) mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0 # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1 x_mask_255 = (x_mask + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = args.batch_size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1 x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. logger.info(f'Setting configuration for optimization.') perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_mask_255) x_rec_feat = perceptual_model(x_rec_mask_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3]) loss = loss_pix + args.loss_weight_feat * loss_feat optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Load image list. logger.info(f'Loading target images and context images.') target_list = [] with open(args.target_list, 'r') as f: for line in f: target_list.append(line.strip()) num_targets = len(target_list) context_list = [] with open(args.context_list, 'r') as f: for line in f: context_list.append(line.strip()) num_contexts = len(context_list) num_pairs = num_targets * num_contexts # Invert images. logger.info(f'Start diffusion.') save_interval = args.num_iterations // args.num_results headers = [ 'Target Image', 'Context Image', 'Stitched Image', 'Encoder Output' ] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=num_pairs, num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) latent_codes_enc = [] latent_codes = [] for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False): # Load target. target_image = resize_image(load_image(target_list[target_idx]), (image_size, image_size)) visualizer.set_cell(target_idx * num_contexts, 0, image=target_image) for context_idx in tqdm(range(0, num_contexts, args.batch_size), desc='Context ID', leave=False): row_idx = target_idx * num_contexts + context_idx batch = context_list[context_idx:context_idx + args.batch_size] for i, context_image_path in enumerate(batch): context_image = resize_image(load_image(context_image_path), (image_size, image_size)) visualizer.set_cell(row_idx + i, 1, image=context_image) context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = (target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]) visualizer.set_cell(row_idx + i, 2, image=context_image) images[i] = np.transpose(context_image, [2, 0, 1]) inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. sess.run([setter], {x: inputs}) outputs = sess.run([wp, x_rec]) latent_codes_enc.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): visualizer.set_cell(row_idx + i, 3, image=outputs[1][i]) # Optimize latent codes. col_idx = 4 for step in tqdm(range(1, args.num_iterations + 1), leave=False): sess.run(train_op, {x: inputs}) if step == args.num_iterations or step % save_interval == 0: outputs = sess.run([wp, x_rec]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): visualizer.set_cell(row_idx + i, col_idx, image=outputs[1][i]) col_idx += 1 latent_codes.append(outputs[0][0:len(batch)]) # Save results. code_shape = [num_targets, num_contexts] + list(latent_shape[1:]) np.save(f'{output_dir}/{job_name}_encoded_codes.npy', np.concatenate(latent_codes_enc, axis=0).reshape(code_shape)) np.save(f'{output_dir}/{job_name}_inverted_codes.npy', np.concatenate(latent_codes, axis=0).reshape(code_shape)) visualizer.save(f'{output_dir}/{job_name}.html')
# Load the predictor predictor = dlib.shape_predictor("./shape_predictor_68_face_landmarks.dat") # Convert image into grayscale gray = cv2.cvtColor(src=img, code=cv2.COLOR_BGR2GRAY) # Use detector to find landmarks faces = detector(gray) points = np.arange(54).reshape(1, 27, 2) for face in faces: x1 = face.left() # left point y1 = face.top() # top point x2 = face.right() # right point y2 = face.bottom() # bottom point # Create landmark object landmarks = predictor(image=gray, box=face) # Loop through all the points cnt = 0 for n in range(0, 27): x = landmarks.part(n).x y = landmarks.part(n).y if n > 16: n = 26 - cnt cnt += 1 points[0][n] = [x, y] return points if __name__=="__main__": img = img = resize_image(cv2.imread("/Users/taehoonlee/Desktop/align/100it.png"), (256, 256)) feature(img)
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.target_list) target_list_name = os.path.splitext(os.path.basename(args.target_list))[0] assert os.path.exists(args.context_list) context_list_name = os.path.splitext(os.path.basename(args.context_list))[0] output_dir = args.output_dir or f'results/diffusion' job_name = f'{target_list_name}_TO_{context_list_name}' logger = setup_logger(output_dir, f'{job_name}.log', f'{job_name}_logger') logger.info(f'Loading model.') inverter = StyleGANInverter( args.model_name, learning_rate=args.learning_rate, iteration=args.num_iterations, reconstruction_loss_weight=1.0, perceptual_loss_weight=args.loss_weight_feat, regularization_loss_weight=0.0, logger=logger) image_size = inverter.G.resolution # Load image list. logger.info(f'Loading target images and context images.') target_list = [] with open(args.target_list, 'r') as f: for line in f: target_list.append(line.strip()) num_targets = len(target_list) context_list = [] with open(args.context_list, 'r') as f: for line in f: context_list.append(line.strip()) num_contexts = len(context_list) num_pairs = num_targets * num_contexts # Initialize visualizer. save_interval = args.num_iterations // args.num_results headers = ['Target Image', 'Context Image', 'Stitched Image', 'Encoder Output'] for step in range(1, args.num_iterations + 1): if step == args.num_iterations or step % save_interval == 0: headers.append(f'Step {step:06d}') viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer( num_rows=num_pairs, num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) # Diffuse images. logger.info(f'Start diffusion.') latent_codes = [] for target_idx in tqdm(range(num_targets), desc='Target ID', leave=False): # Load target. target_image = resize_image(load_image(target_list[target_idx]), (image_size, image_size)) visualizer.set_cell(target_idx * num_contexts, 0, image=target_image) for context_idx in tqdm(range(num_contexts), desc='Context ID', leave=False): row_idx = target_idx * num_contexts + context_idx context_image = resize_image(load_image(context_list[context_idx]), (image_size, image_size)) visualizer.set_cell(row_idx, 1, image=context_image) code, viz_results = inverter.easy_diffuse(target=target_image, context=context_image, center_x=args.center_x, center_y=args.center_y, crop_x=args.crop_size, crop_y=args.crop_size, num_viz=args.num_results) for viz_idx, viz_img in enumerate(viz_results): visualizer.set_cell(row_idx, viz_idx + 2, image=viz_img) latent_codes.append(code) # Save results. os.system(f'cp {args.target_list} {output_dir}/target_list.txt') os.system(f'cp {args.context_list} {output_dir}/context_list.txt') np.save(f'{output_dir}/{job_name}_inverted_codes.npy', np.concatenate(latent_codes, axis=0)) visualizer.save(f'{output_dir}/{job_name}.html')
def encode(_target_image, _context_image, _output_dir): gpu_id = '0' os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id print(_target_image) assert os.path.exists('./static/' + _target_image) _output_dir = _output_dir[:-4] output_dir = './static/' + _output_dir tflib.init_tf({'rnd.np_random_seed': 1000}) model_path = './styleganinv_face_256.pkl' with open(model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] crop_size = 110 # default crop size. center_x = 125 center_y = 145 crop_x = center_x - crop_size // 2 # default coordinate-X crop_y = center_y - crop_size // 2 # default coordinate-Y mask = np.zeros((1, image_size, image_size, 3), dtype=np.float32) mask[:, crop_y:crop_y + crop_size, crop_x:crop_x + crop_size, :] = 1.0 # Build graph. sess = tf.get_default_session() batch_size = 4 input_shape = E.input_shape input_shape[0] = batch_size # default batch size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_mask = (tf.transpose(x, [0, 2, 3, 1]) + 1) * mask - 1 x_mask_255 = (x_mask + 1) / 2 * 255 latent_shape = Gs.components.synthesis.input_shape latent_shape[0] = batch_size # default batch size wp = tf.get_variable(shape=latent_shape, name='latent_code') x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_mask = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) * mask - 1 x_rec_mask_255 = (x_rec_mask + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. print("Diffusion : Settings for Optimization.") perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_mask_255) x_rec_feat = perceptual_model(x_rec_mask_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x_mask - x_rec_mask), axis=[1, 2, 3]) loss_weight_feat = 5e-5 learning_rate = 0.01 loss = loss_pix + loss_weight_feat * loss_feat # default The perceptual loss scale for optimization. (default 5e-5) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Invert image num_iterations = 100 num_results = 5 save_interval = num_iterations // num_results images = np.zeros(input_shape, np.uint8) print("Load target image.") _target_image = './static/' + _target_image target_image = resize_image(load_image(_target_image), (image_size, image_size)) save_image('./' + output_dir + '_tar.png', target_image) print("Load context image.") context_image = getContextImage(_context_image) context_image = resize_image(load_image(context_image), (image_size, image_size)) save_image('./' + output_dir + '_cont.png', context_image) # Inverting Context Image. # context_image = invert(model_path, getContextImage(_context_image), wp, latent_shape) save_image('./' + output_dir + '_cont_inv.png', context_image) # Create Stitched Image # context_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = ( # target_image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] # ) # context_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] = ( # target_image[crop_y:crop_y + 170, crop_x - 70:crop_x + crop_size + 190] # ) print("Cropping Image...") # context_image = cropImage(target_image, context_image) target_image, rect = cropWithWhite(target_image) target_image = fourChannels(target_image) target_image = cut(target_image) target_image = transBg(target_image) context_image = createStitchedImage(context_image, target_image, rect) save_image('./' + output_dir + '_sti.png', context_image) images[0] = np.transpose(context_image, [2, 0, 1]) input = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder print("Start Diffusion.") sess.run([setter], {x: input}) output = sess.run([wp, x_rec]) output[1] = adjust_pixel_range(output[1]) col_idx = 4 for step in tqdm(range(1, num_iterations + 1), leave=False): sess.run(train_op, {x: input}) if step == num_iterations or step % save_interval == 0: output = sess.run([wp, x_rec]) output[1] = adjust_pixel_range(output[1]) if step == num_iterations: save_image(f'{output_dir}.png', output[1][0]) col_idx += 1 exit()
def invert(model_path, _image, _wp, _latent_shape): print("Inverting") tflib.init_tf({'rnd.np_random_seed': 1000}) with open(model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] # Build graph. print("Inverting : Build Graph.") sess = tf.get_default_session() batch_size = 4 input_shape = E.input_shape input_shape[0] = batch_size # default batch size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') x_255 = (tf.transpose(x, [0, 2, 3, 1]) + 1) / 2 * 255 wp = _wp x_rec = Gs.components.synthesis.get_output_for(wp, randomize_noise=False) x_rec_255 = (tf.transpose(x_rec, [0, 2, 3, 1]) + 1) / 2 * 255 w_enc = E.get_output_for(x, phase=False) wp_enc = tf.reshape(w_enc, _latent_shape) setter = tf.assign(wp, wp_enc) # Settings for optimization. print("Inverting : Settings for Optimization.") perceptual_model = PerceptualModel([image_size, image_size], False) x_feat = perceptual_model(x_255) x_rec_feat = perceptual_model(x_rec_255) loss_feat = tf.reduce_mean(tf.square(x_feat - x_rec_feat), axis=[1]) loss_pix = tf.reduce_mean(tf.square(x - x_rec), axis=[1, 2, 3]) w_enc_new = E.get_output_for(x_rec, phase=False) wp_enc_new = tf.reshape(w_enc_new, _latent_shape) loss_enc = tf.reduce_mean(tf.square(wp - wp_enc_new), axis=[1, 2]) loss = (loss_pix + 5e-5 * loss_feat + 2.0 * loss_enc) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, var_list=[wp]) tflib.init_uninitialized_vars() # Invert image print("Start Inverting.") num_iterations = 40 num_results = 2 save_interval = num_iterations // num_results context_images = np.zeros(input_shape, np.uint8) context_image = resize_image(load_image(_image), (image_size, image_size)) # Inverting Context Image. context_images[0] = np.transpose(context_image, [2, 0, 1]) context_input = context_images.astype(np.float32) / 255 * 2.0 - 1.0 sess.run([setter], {x: context_input}) context_output = sess.run([wp, x_rec]) context_output[1] = adjust_pixel_range(context_output[1]) context_image = np.transpose(context_images[0], [1, 2, 0]) for step in tqdm(range(1, num_iterations + 1), leave=False): sess.run(train_op, {x: context_input}) if step == num_iterations or step % save_interval == 0: context_output = sess.run([wp, x_rec]) context_output[1] = adjust_pixel_range(context_output[1]) if step == num_iterations: context_image = context_output[1][0] return context_image
def main(): """Main function.""" args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id assert os.path.exists(args.image_list) image_list_name = os.path.splitext(os.path.basename(args.image_list))[0] output_dir = args.output_dir or f'results/ghfeat/{image_list_name}' logger = setup_logger(output_dir, 'extract_feature.log', 'inversion_logger') logger.info(f'Loading model.') tflib.init_tf({'rnd.np_random_seed': 1000}) with open(args.model_path, 'rb') as f: E, _, _, Gs = pickle.load(f) # Get input size. image_size = E.input_shape[2] assert image_size == E.input_shape[3] G_args = EasyDict(func_name='training.networks_stylegan.G_synthesis') G_style_mod = tflib.Network('G_StyleMod', resolution=image_size, label_size=0, **G_args) Gs_vars_pairs = { name: tflib.run(val) for name, val in Gs.components.synthesis.vars.items() } for g_name, g_val in G_style_mod.vars.items(): tflib.set_vars({g_val: Gs_vars_pairs[g_name]}) # Build graph. logger.info(f'Building graph.') sess = tf.get_default_session() input_shape = E.input_shape input_shape[0] = args.batch_size x = tf.placeholder(tf.float32, shape=input_shape, name='real_image') ghfeat = E.get_output_for(x, is_training=False) x_rec = G_style_mod.get_output_for(ghfeat, randomize_noise=False) # Load image list. logger.info(f'Loading image list.') image_list = [] with open(args.image_list, 'r') as f: for line in f: image_list.append(line.strip()) # Extract GH-Feat from images. logger.info(f'Start feature extraction.') headers = ['Name', 'Original Image', 'Encoder Output'] viz_size = None if args.viz_size == 0 else args.viz_size visualizer = HtmlPageVisualizer(num_rows=len(image_list), num_cols=len(headers), viz_size=viz_size) visualizer.set_headers(headers) images = np.zeros(input_shape, np.uint8) names = ['' for _ in range(args.batch_size)] features = [] for img_idx in tqdm(range(0, len(image_list), args.batch_size), leave=False): # Load inputs. batch = image_list[img_idx:img_idx + args.batch_size] for i, image_path in enumerate(batch): image = resize_image(load_image(image_path), (image_size, image_size)) images[i] = np.transpose(image, [2, 0, 1]) names[i] = os.path.splitext(os.path.basename(image_path))[0] inputs = images.astype(np.float32) / 255 * 2.0 - 1.0 # Run encoder. outputs = sess.run([ghfeat, x_rec], {x: inputs}) features.append(outputs[0][0:len(batch)]) outputs[1] = adjust_pixel_range(outputs[1]) for i, _ in enumerate(batch): image = np.transpose(images[i], [1, 2, 0]) save_image(f'{output_dir}/{names[i]}_ori.png', image) save_image(f'{output_dir}/{names[i]}_enc.png', outputs[1][i]) visualizer.set_cell(i + img_idx, 0, text=names[i]) visualizer.set_cell(i + img_idx, 1, image=image) visualizer.set_cell(i + img_idx, 2, image=outputs[1][i]) # Save results. os.system(f'cp {args.image_list} {output_dir}/image_list.txt') np.save(f'{output_dir}/ghfeat.npy', np.concatenate(features, axis=0)) visualizer.save(f'{output_dir}/reconstruction.html')