def preprocess_celebV(args): """ save .npz files containing images(as uint8), boundary images(as uint8), and boundary heatmaps (as float). """ print("preprocessing celebV dataset...") src_dir = args.src_dir assert os.path.isdir(src_dir) out_dir = args.out_dir os.makedirs(out_dir, exist_ok=True) resize_size = args.resize_size line_thickness = args.line_thickness gaussian_kernel = args.gaussian_kernel gaussian_sigma = args.gaussian_sigma if args.celeb_name_list is None: celeb_name_list = ['Donald_Trump', 'Emmanuel_Macron', 'Jack_Ma', 'Kathleen', 'Theresa_May'] else: celeb_name_list = args.celeb_name_list for celeb_name in celeb_name_list: imgs_root_path = os.path.join(src_dir, "CelebV", celeb_name) if not os.path.exists(imgs_root_path): raise ValueError(f"specified path {imgs_root_path} not found.") prep = Preprocessor(imgs_root_path, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) annotations, _ = prep.get_ant_and_size(imgs_root_path) tmp_image_dict = dict() tmp_hm_dict = dict() tmp_hm_resized_dict = dict() if args.save_boundary_image: os.makedirs(os.path.join( out_dir, celeb_name, "Image"), exist_ok=True) for annotation in tqdm(annotations): img, bod_map, bod_img, img_name, bod_map_resized = prep(annotation) tmp_image_dict[img_name] = img # uint8 tmp_hm_dict[img_name] = bod_map # uint8 tmp_hm_resized_dict[img_name] = bod_map_resized # uint8 if args.save_boundary_image: save_path = os.path.join( out_dir, celeb_name, "Image", img_name) imsave(save_path, bod_img, channel_first=True) np.savez_compressed(os.path.join( out_dir, celeb_name + '_image'), **tmp_image_dict) np.savez_compressed(os.path.join( out_dir, celeb_name + '_heatmap'), **tmp_hm_dict) np.savez_compressed(os.path.join( out_dir, celeb_name + '_resized_heatmap'), **tmp_hm_resized_dict) del tmp_image_dict del tmp_hm_dict del tmp_hm_resized_dict
def save_generations(rgb_output, filepath, drange=[-1, 1], return_images=False): """ Save generated images """ if return_images: images = [] for i in range(rgb_output.shape[0]): scale = 255 / (drange[1] - drange[0]) if isinstance(rgb_output, nn.Variable): image = rgb_output.d[i] * scale + (0.5 - drange[0] * scale) else: image = rgb_output.data[i] * scale + (0.5 - drange[0] * scale) if return_images: images.append(np.uint8(np.clip(image, 0, 255))) else: imsave(f'{filepath}_{i}.png', np.uint8(np.clip(image, 0, 255)), channel_first=True) print(f'Output saved. Saved {filepath}_{i}.png') if return_images: return images
def generate_flipped_images(gen, latent_vector, hyper_sphere=True, save_dir=None): """ generate flipped images Args: gen : generator latent_vector(numpy.ndarray) : latent_vector hyper_sphere (bool) : default True save_dir (str) : directory to save the images """ if not path.isdir(save_dir): mkdir(save_dir) z_data = np.reshape(latent_vector, (latent_vector.shape[0], latent_vector.shape[1], 1, 1)) z = nn.Variable.from_numpy_array(z_data) z = pixel_wise_feature_vector_normalization(z) if hyper_sphere else z batch_size = 64 # we have taken batch size of 64 num_images = latent_vector.shape[0] iterations = int(num_images / batch_size) if num_images % batch_size != 0: iterations += 1 count = 0 for ell in range(iterations): y = gen(z[ell * batch_size:(ell + 1) * batch_size], test=True) images = convert_images_to_uint8(y, drange=[-1, 1]) for i in range(images.shape[0]): imsave(save_dir + '/gen_' + str(count) + '.jpg', images[i], channel_first=True) count += 1 print("all paired images generated")
def add(self, name, var): import nnabla as nn from nnabla.utils.image_utils import imsave if isinstance(var, nn.Variable): data = var.d.copy() elif isinstance(var, nn.NdArray): data = var.data.copy() else: assert isinstance(var, np.ndarray) data = var.copy() assert data.ndim > 2 channels = data.shape[-3] data = data.reshape(-1, *data.shape[-3:]) data = data[:min(data.shape[0], self.num_images)] data = self.normalize_method(data) if channels > 3: data = data[:, :3] elif channels == 2: data = np.concatenate( [data, np.ones((data.shape[0], 1) + data.shape[-2:])], axis=1) path_tmpl = os.path.join(self.save_dir, '{}.png') for j in range(min(self.num_images, data.shape[0])): img = data[j].transpose(1, 2, 0) if img.shape[-1] == 1: img = img[..., 0] path = path_tmpl.format(name) imsave(path, img)
def _update_result(args, index, result, values, output_index, type_end_names, output_image): outputs = [] for o, type_and_name in zip(values, type_end_names): for data_index, d in enumerate(o): if len(result.dims) <= output_index: result = _set_initial_values(result, type_and_name, d) if len(outputs) <= data_index: outputs.append([]) name = result.names[output_index] vtype = result.types[output_index] dim = result.dims[output_index] # Output data if vtype == 'col' or not output_image: # Vector type output outputs[data_index].extend(np.ndarray.flatten(d)) else: for dim_index in range(dim): file_index = index + data_index file_name = '{}_{:04d}'.format( output_index, file_index // 1000) + os.path.sep if dim > 1: file_name += str(dim_index) + '_' file_name += '{}{}'.format(file_index, vtype) full_path = os.path.join(args.outdir, args.result_outdir, file_name) directory = os.path.dirname(full_path) try: os.makedirs(directory) except OSError: pass # python2 does not support exists_ok arg if vtype in [ '.bmp', '.jpeg', '.jpg', '.png', '.gif', '.tif' ]: x = np.array(d, dtype=np.float32) * 255. while len(x.shape) == 4: x = x[0] if x.shape[0] > 3 or x.shape[0] == 2: x = x[dim_index] elif x.shape[0] == 3: x = x.transpose(1, 2, 0) else: x = x.reshape(x.shape[1], x.shape[2]) x = x.clip(0, 255).astype(np.uint8) imsave(full_path, x) else: # CSV type with open(full_path, 'w') as f: writer = csv.writer(f, lineterminator='\n') x = np.array(d) writer.writerows(x) outputs[data_index].append( os.path.join('.', args.result_outdir, file_name)) output_index += 1 return result, outputs
def style_mixing(self, test_config, args): from nnabla.utils.image_utils import imsave, imresize print('Testing style mixing of generation...') z1 = F.randn(shape=(args.batch_size_A, test_config['latent_dim']), seed=args.seed_1[0]).data z2 = F.randn(shape=(args.batch_size_B, test_config['latent_dim']), seed=args.seed_2[0]).data nn.set_auto_forward(True) mix_image_stacks = [] for i in range(args.batch_size_A): image_column = [] for j in range(args.batch_size_B): style_noises = [ F.reshape(z1[i], (1, 512)), F.reshape(z2[j], (1, 512)) ] rgb_output = self.generator( 1, style_noises, test_config['truncation_psi'], mixing_layer_index=test_config['mix_after']) image = save_generations(rgb_output, None, return_images=True) image_column.append(image[0]) image_column = np.concatenate([image for image in image_column], axis=1) mix_image_stacks.append(image_column) mix_image_stacks = np.concatenate( [image for image in mix_image_stacks], axis=2) style_noises = [z1, z1] rgb_output = self.generator(args.batch_size_A, style_noises, test_config['truncation_psi']) image_A = save_generations(rgb_output, None, return_images=True) image_A = np.concatenate([image for image in image_A], axis=2) style_noises = [z2, z2] rgb_output = self.generator(args.batch_size_B, style_noises, test_config['truncation_psi']) image_B = save_generations(rgb_output, None, return_images=True) image_B = np.concatenate([image for image in image_B], axis=1) top_image = 255 * np.ones(rgb_output[0].shape).astype(np.uint8) top_image = np.concatenate((top_image, image_A), axis=2) grid_image = np.concatenate((image_B, mix_image_stacks), axis=2) grid_image = np.concatenate((top_image, grid_image), axis=1) filename = os.path.join(self.results_dir, 'style_mix.png') imsave(filename, imresize(grid_image, (1024, 1024), channel_first=True), channel_first=True) print(f'Output saved as {filename}')
def save_image(image_path, label, color_map=None): h, w = label.shape vis = np.zeros((h, w, 3), dtype=np.int32) if color_map is None: color_map = get_color() for y in range(h): for x in range(w): vis[y][x] = (color_map[int(label[y][x])]) vis = vis/(np.amax(vis)) imsave(image_path, vis)
def main(): ''' ''' args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Load parameters load_parameters(args.monitor_path) # Build model image, label, noise, recon = model_tweak_digitscaps(10) # Get images from 0 to 10. images, labels = load_mnist(train=False) batch_images = [] batch_labels = [] ind = 123 for i in range(10): class_images = images[labels.flat == i] img = class_images[min(class_images.shape[0], ind)] batch_images.append(img) batch_labels.append(i) batch_images = np.stack(batch_images, axis=0) batch_labels = np.array(batch_labels).reshape(-1, 1) # Generate reconstructed images with tweaking capsules image.d = batch_images label.d = batch_labels results = [] for d in range(noise.shape[2]): # 16 for r in np.arange(-0.25, 0.30, 0.05): batch_noise = np.zeros(noise.shape) batch_noise[..., d] += r noise.d = batch_noise recon.forward(clear_buffer=True) results.append(recon.d.copy()) # results shape: [16, 11, 10, 1, 28, 28] results = np.array(results).reshape((noise.shape[2], -1) + image.shape) # Draw tweaked images from nnabla.utils.image_utils import imsave for i in range(10): adigit = (results[:, :, i] * 255).astype(np.uint8) drawn = draw_images(adigit) imsave(os.path.join(args.monitor_path, 'tweak_digit_%d.png' % i), drawn)
def main(): ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) image_left = imread(args.left_image) image_right = imread(args.right_image) if args.dataset == 'Kitti': var_left = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt)) var_right = nn.Variable((1, 3, args.im_height_kt, args.im_width_kt)) img_left, img_right = preprocess_kitti(image_left, image_right) elif args.dataset == 'SceneFlow': var_left = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf)) var_right = nn.Variable((1, 3, args.im_height_sf, args.im_width_sf)) img_left, img_right = preprocess_sceneflow(image_left, image_right) var_left.d, var_right.d = img_left, img_right if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) pred_test = psm_net(var_left, var_right, args.maxdisp, False) pred_test.forward(clear_buffer=True) pred = pred_test.d pred = np.squeeze(pred, axis=1) pred = pred[0] pred = 2 * (pred - np.min(pred)) / np.ptp(pred) - 1 imsave('stereo_depth.png', (pred + 1) * 0.5) print("Done") # Save NNP file (used in C++ inference later.). if args.save_nnp: runtime_contents = { 'networks': [{ 'name': 'runtime', 'batch_size': 1, 'outputs': { 'y0': pred_test }, 'names': { 'x0': var_left, 'x1': var_right } }], 'executors': [{ 'name': 'runtime', 'network': 'runtime', 'data': ['x0', 'x1'], 'output': ['y0'] }] } import nnabla.utils.save nnabla.utils.save.save(args.nnp, runtime_contents)
def generate_csv_png(num_of_data, img_size): with create_temp_with_dir('test.csv') as csvfilename: imgdir = os.path.dirname(csvfilename) with open(csvfilename, 'w') as f: f.write('x:image,y\n') for n in range(0, num_of_data): x = np.identity(img_size).astype(numpy.uint8) * n img_name = 'image_{}.png'.format(n) with warnings.catch_warnings(): warnings.simplefilter("ignore") imsave(os.path.join(imgdir, img_name), x) f.write('{}, {}\n'.format(img_name, n % 10)) yield csvfilename
def _save_image(self, file_name, image): if isinstance(image, nn.Variable): img = image.data.get_data("r") elif isinstance(image, nn.NdArray): img = image.get_data("r") else: assert isinstance(image, np.ndarray) img = image dir_path = os.path.join(self.save_path, "html", "imgs") if not os.path.exists(dir_path): os.makedirs(dir_path) save_path = os.path.join(dir_path, file_name) img = (img - img.min()) / (img.max() - img.min()) imsave(save_path, img)
def add(self, index, var): """Add a minibatch of images to the monitor. Args: index (int): Index. var (:obj:`~nnabla.Variable`, :obj:`~nnabla.NdArray`, or :obj:`~numpy.ndarray`): A minibatch of images with ``(N, ..., C, H, W)`` format. If C == 2, blue channel is appended with ones. If C > 3, the array will be sliced to remove C > 3 sub-array. """ import nnabla as nn from nnabla.utils.image_utils import imsave if index != 0 and (index + 1) % self.interval != 0: return if isinstance(var, nn.Variable): data = var.d.copy() elif isinstance(var, nn.NdArray): data = var.data.copy() else: assert isinstance(var, np.ndarray) data = var.copy() assert data.ndim > 2 channels = data.shape[-3] data = data.reshape(-1, *data.shape[-3:]) data = data[:min(data.shape[0], self.num_images)] data = self.normalize_method(data) if channels > 3: data = data[:, :3] elif channels == 2: data = np.concatenate( [data, np.ones((data.shape[0], 1) + data.shape[-2:])], axis=1) path_tmpl = os.path.join(self.save_dir, '{:06d}-{}.png') for j in range(min(self.num_images, data.shape[0])): img = data[j].transpose(1, 2, 0) if img.shape[-1] == 1: img = img[..., 0] path = path_tmpl.format(index, '{:03d}'.format(j)) imsave(path, img) if self.verbose: logger.info("iter={} {{{}}} are written to {}.".format( index, self.name, path_tmpl.format(index, '*')))
def test_data_iterator(di, output_dir, comm=None, num_iters=100): from nnabla.utils.image_utils import imsave from neu.reporter import KVReporter reporter = KVReporter(comm=comm) os.makedirs(output_dir, exist_ok=True) for itr in range(num_iters): data, label = di.next() reporter.kv_mean("mean", data.mean()) reporter.kv_mean("std", data.std()) reporter.kv_mean("max", data.max()) reporter.kv_mean("min", data.min()) imsave(os.path.join(output_dir, f"{itr}.png"), data, channel_first=True) reporter.dump()
def generate_images(gen, num_images, n_latent=512, hyper_sphere=True, save_dir=None, latent_vector=None): """ generate the images Args: gen : load generator num_images (int) : number of images to generate n_latent (int) : 512-D latent space trained on the CelebA hyper_sphere (bool) : default True save_dir (str) : directory to save the images latent_vector (str) : path to save the latent vectors(.pkl file) """ if not path.isdir(save_dir): mkdir(save_dir) z_data = np.random.randn(num_images, n_latent, 1, 1) # Saving latent vectors with open(latent_vector, 'wb+') as f: pickle.dump(z_data.reshape((num_images, n_latent)), f) z = nn.Variable.from_numpy_array(z_data) z = pixel_wise_feature_vector_normalization(z) if hyper_sphere else z batch_size = 64 iterations = int(num_images / batch_size) if num_images % batch_size != 0: iterations += 1 count = 0 for ell in range(iterations): y = gen(z[ell * batch_size:(ell + 1) * batch_size], test=True) images = convert_images_to_uint8(y, drange=[-1, 1]) for i in range(images.shape[0]): imsave(save_dir + '/gen_' + str(count) + '.jpg', images[i], channel_first=True) count += 1 print("images are generated")
def save_tiled_image(img, path, channel_last=False): """ Save given batched images as tiled image. The first axis will be handled as batch. Args: img (np.ndarray): Images to save. The shape should be (B, C, H, W) or (B, H, W, C) depending on `channel_last`. dtype must be np.uint8. path (str): Path to save. channel_last (bool): If True, the last axis (=3) will be handled as channel. """ tiled_image = get_tiled_image(img, channel_last=channel_last) assert tiled_image.dtype == np.uint8 # create directory if needed os.makedirs(os.path.dirname(path), exist_ok=True) # save imsave(path, tiled_image)
def add(self, name, var): import nnabla as nn from nnabla.utils.image_utils import imsave if isinstance(var, nn.Variable): data = var.d.copy() elif isinstance(var, nn.NdArray): data = var.data.copy() else: assert isinstance(var, np.ndarray) data = var.copy() assert data.ndim > 2 channels = data.shape[-3] data = data.reshape(-1, *data.shape[-3:]) data = data[:min(data.shape[0], self.num_images)] data = self.normalize_method(data) if channels > 3: data = data[:, :3] elif channels == 2: data = np.concatenate( [data, np.ones((data.shape[0], 1) + data.shape[-2:])], axis=1) tile = tile_images(data) path = os.path.join(self.save_dir, '{}.png'.format(name)) imsave(path, tile)
def save_image_function(): image_utils.imsave(img_file, img, channel_first=channel_first, as_uint16=as_uint16, auto_scale=auto_scale)
def generate(): rng = np.random.RandomState(803) conf = get_config() # set context comm = init_nnabla(conf) # find all test data if conf.dataset == "cityscapes": data_list = get_cityscape_datalist(conf.cityscapes, data_type="val", save_file=comm.rank == 0) conf.n_class = conf.cityscapes.n_label_ids use_inst = True data_iter = create_cityscapes_iterator(conf.batch_size, data_list, comm=comm, image_shape=conf.image_shape, rng=rng, flip=False) elif conf.dataset == "ade20k": data_list = get_ade20k_datalist(conf.ade20k, data_type="val", save_file=comm.rank == 0) conf.n_class = conf.ade20k.n_label_ids + 1 # class id + unknown use_inst = False load_shape = tuple( x + 30 for x in conf.image_shape) if conf.use_crop else conf.image_shape data_iter = create_ade20k_iterator(conf.batch_size, data_list, comm=comm, load_shape=load_shape, crop_shape=conf.image_shape, rng=rng, flip=False) else: raise NotImplementedError( "Currently dataset {} is not supported.".format(conf.dataset)) # define generator generator = Generator(conf, use_inst) # load parameters if not os.path.exists(conf.load_params): logger.warn( "Path to load params is not found." " Loading params is skipped and generated result will be unreasonable. ({})" .format(conf.load_params)) else: print("load parameters from {}".format(conf.load_params)) nn.load_parameters(conf.load_params) niter = get_iteration_per_epoch(data_iter._size, conf.batch_size, round="ceil") progress_iterator = trange(niter, desc="[Generating Images]", disable=comm.rank > 0) # for label2color label2color = Colorize(conf.n_class) save_path = os.path.join(conf.save_path, "generated") if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) logger.info("Generated images will be saved on '{}'.".format(save_path)) cnt = 0 for i in progress_iterator: if conf.dataset == "cityscapes": _, instance_id, object_id = data_iter.next() elif conf.dataset == "ade20k": _, object_id = data_iter.next() instance_id = None else: raise NotImplemented() gen = generator(instance_id, object_id) id_colorized = label2color(object_id).astype(np.uint8) valid = conf.batch_size if cnt > data_iter._size: valid = data_iter._size - conf.batch_size * (i - 1) for j in range(valid): gen_image_path = os.path.join( save_path, "res_{}_{}.png".format(comm.rank, cnt + j)) input_image_path = os.path.join( save_path, "input_{}_{}.png".format(comm.rank, cnt + j)) imsave(gen_image_path, gen[j], channel_first=True) imsave(input_image_path, id_colorized[j]) cnt += conf.batch_size
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output-filename', '-o', type=str, default=None, help="name of an output image file.") parser.add_argument('--output-dir', '-d', type=str, default="results", help="directory where the generated image is saved.") parser.add_argument('--seed', type=int, required=True, help="seed for primal style noise.") parser.add_argument('--stochastic-seed', type=int, default=1, help="seed for noises added to intermediate features.") parser.add_argument('--truncation-psi', default=0.5, type=float, help="value for truncation trick.") parser.add_argument('--batch-size', type=int, default=1, help="Number of images to generate.") parser.add_argument( '--mixing', action='store_true', help="if specified, apply style mixing with additional seed.") parser.add_argument('--seed-mix', type=int, default=None, help="seed for another / secondary style noise.") parser.add_argument('--mix-after', type=int, default=7, help="after this layer, style mixing is applied.") parser.add_argument('--context', '-c', type=str, default="cudnn", help="context. cudnn is recommended.") args = parser.parse_args() assert 0 < args.mix_after < 17, "specify --mix-after from 1 to 16." if not os.path.isfile("styleGAN2_G_params.h5"): print("Downloading the pretrained weight. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, url.split('/')[-1], False) ctx = get_extension_context(args.context) nn.set_default_context(ctx) batch_size = args.batch_size num_layers = 18 rnd = np.random.RandomState(args.seed) z = rnd.randn(batch_size, 512) print("Generation started...") print(f"truncation value: {args.truncation_psi}") print(f"seed for additional noise: {args.stochastic_seed}") # Inference via nn.NdArray utilizes significantly less memory if args.mixing: # apply style mixing assert args.seed_mix print( f"using style noise seed {args.seed} for layers 0-{args.mix_after - 1}" ) print( f"using style noise seed {args.seed_mix} for layers {args.mix_after}-{num_layers}." ) rnd = np.random.RandomState(args.seed_mix) z2 = rnd.randn(batch_size, 512) style_noises = [nn.NdArray.from_numpy_array(z)] style_noises += [nn.NdArray.from_numpy_array(z2)] else: # no style mixing (single noise / style is used) print(f"using style noise seed {args.seed} for entire layers.") style_noises = [nn.NdArray.from_numpy_array(z) for _ in range(2)] nn.set_auto_forward(True) nn.load_parameters("styleGAN2_G_params.h5") rgb_output = generate(batch_size, style_noises, args.stochastic_seed, args.mix_after, args.truncation_psi) # convert to uint8 to save an image file image = convert_images_to_uint8(rgb_output, drange=[-1, 1]) if args.output_filename is None: if not args.mixing: filename = f"seed{args.seed}" else: filename = f"seed{args.seed}_{args.seed_mix}" else: filename = args.output_filename os.makedirs(args.output_dir, exist_ok=True) for i in range(batch_size): filepath = os.path.join(args.output_dir, f'{filename}_{i}.png') imsave(filepath, image[i], channel_first=True) print(f"Genetation completed. Saved {filepath}.")
def convert_image(args): file_name = args[0] source_dir = args[1] dest_dir = args[2] width = args[3] height = args[4] mode = args[5] ch = args[6] src_file_name = os.path.join(source_dir, file_name) file_name = os.path.splitext(file_name)[0] + ".png" dest_file_name = os.path.join(dest_dir, file_name) dest_path = os.path.dirname(dest_file_name) # print(src_file_name, dest_file_name) # open source image try: im = imread(src_file_name) if len(im.shape) < 2 or len(im.shape) > 3: logger.warning( "Illegal image file format %s.".format(src_file_name)) raise elif len(im.shape) == 3: # RGB image if im.shape[2] != 3: logger.warning("The image must be RGB or monochrome.") csv_data.remove(data) raise # resize h = im.shape[0] w = im.shape[1] # print(h, w) if w != width or h != height: # resize image if mode == 'trimming': # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) elif mode == 'padding': # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) if len(im.shape) == 3: pad = pad + ((0, 0), ) im = np.pad(im, pad, 'constant') # print('before', im.shape) im = imresize(im, size=(height, width)) # print('after', im.shape) # change color ch if len(im.shape) == 2 and ch == 3: # Monochrome to RGB im = np.array([im, im, im]).transpose((1, 2, 0)) elif len(im.shape) == 3 and ch == 1: # RGB to monochrome im = np.dot(im[..., :3], [0.299, 0.587, 0.114]) # output try: os.makedirs(dest_path) except OSError: pass # python2 does not support exists_ok arg imsave(dest_file_name, im) except: logger.warning("Failed to convert %s." % (src_file_name))
def generate_data(args): if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print( "Downloading the pretrained tf-converted weights. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join(args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') os.makedirs(args.save_image_path, exist_ok=True) batches = [ args.batch_size for _ in range(args.num_images // args.batch_size) ] if args.num_images % args.batch_size != 0: batches.append(args.num_images - (args.num_images // args.batch_size) * args.batch_size) for idx, batch_size in enumerate(batches): z = [ F.randn(shape=(batch_size, 512)).data, F.randn(shape=(batch_size, 512)).data ] for i in range(len(z)): z[i] = F.div2( z[i], F.pow_scalar(F.add_scalar( F.mean(z[i]**2., axis=1, keepdims=True), 1e-8), 0.5, inplace=True)) # get latent code w = [mapping_network(z[0], outmaps=512, num_layers=8)] w += [mapping_network(z[1], outmaps=512, num_layers=8)] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, 0.7) for _ in w] # Load direction if not args.face_morph: attr_delta = nn.NdArray.from_numpy_array( np.load(args.attr_delta_path)) attr_delta = F.reshape(attr_delta[0], (1, -1)) w_plus = [w[0] + args.coeff * attr_delta, w[1]] w_minus = [w[0] - args.coeff * attr_delta, w[1]] else: w_plus = [w[0], w[0]] # content w_minus = [w[1], w[1]] # style constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) gen_plus = synthesis(w_plus, constant_bc, noise_seed=100, mix_after=8) gen_minus = synthesis(w_minus, constant_bc, noise_seed=100, mix_after=8) gen = synthesis(w, constant_bc, noise_seed=100, mix_after=8) image_plus = convert_images_to_uint8(gen_plus, drange=[-1, 1]) image_minus = convert_images_to_uint8(gen_minus, drange=[-1, 1]) image = convert_images_to_uint8(gen, drange=[-1, 1]) for j in range(batch_size): filepath = os.path.join(args.save_image_path, f'image_{idx*batch_size+j}') imsave(f'{filepath}_o.png', image_plus[j], channel_first=True) imsave(f'{filepath}_y.png', image_minus[j], channel_first=True) imsave(f'{filepath}.png', image[j], channel_first=True) print(f"Genetated. Saved {filepath}")
def preprocess_WFLW(args): import csv print("preprocessing WFLW dataset...") src_dir = args.src_dir assert os.path.isdir(src_dir) out_dir = args.out_dir os.makedirs(out_dir, exist_ok=True) resize_size = args.resize_size line_thickness = args.line_thickness gaussian_kernel = args.gaussian_kernel gaussian_sigma = args.gaussian_sigma imgs_root_path = src_dir assert os.path.exists( imgs_root_path), f"specified path {imgs_root_path} not found." out_csv = [["saved_name", "real_name"]] mode = args.mode textname = f"WFLW_annotations/list_98pt_rect_attr_train_test/list_98pt_rect_attr_{mode}.txt" with open(os.path.join(src_dir, textname)) as f: annotations = f.readlines() annotations = [_.split(" ") for _ in annotations] prep = Preprocessor(imgs_root_path, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) tmp_hm_dict = dict() tmp_img_dict = dict() if args.save_boundary_image: os.makedirs(os.path.join(out_dir, "WFLW_landmark_images", mode), exist_ok=True) os.makedirs(os.path.join(out_dir, "WFLW_cropped_images", mode), exist_ok=True) idx = 0 for annotation in tqdm(annotations): img_name, img, y_list, x_list = get_croped_image( annotation, os.path.join(src_dir, "WFLW_images")) scale_ratio = 256. / img.shape[-1] x_list_scaled = [int(_ * scale_ratio) for _ in x_list] y_list_scaled = [int(_ * scale_ratio) for _ in y_list] img_resized = imresize(img, (256, 256), channel_first=True) bod_img = get_bod_img(img_resized, y_list_scaled, x_list_scaled, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) bod_map = get_bod_map(img_resized, y_list_scaled, x_list_scaled, resize_size, line_thickness, gaussian_kernel, gaussian_sigma) saved_name = f"{mode}_{idx}.png" tmp_img_dict[saved_name] = img_resized tmp_hm_dict[saved_name] = bod_map # uint8 out_csv.append([saved_name, img_name]) if args.save_boundary_image: save_path_bod = os.path.join(out_dir, "WFLW_landmark_images", mode, saved_name) save_path_cropped = os.path.join(out_dir, "WFLW_cropped_images", mode, saved_name) imsave(save_path_bod, bod_img, channel_first=True) imsave(save_path_cropped, img_resized, channel_first=True) idx += 1 np.savez_compressed(os.path.join(out_dir, f'WFLW_cropped_image_{mode}'), **tmp_img_dict) np.savez_compressed(os.path.join(out_dir, f'WFLW_heatmap_{mode}'), **tmp_hm_dict) with open(os.path.join(out_dir, f"{mode}_data.csv"), 'w') as f: writer = csv.writer(f) writer.writerows(out_csv)
def saveimage(path, img): img = (img * 0.5) + 0.5 imsave(path, img, channel_first=True)
def train_nerf(config, comm, model, dataset='blender'): use_transient = False use_embedding = False if model == 'wild': use_transient = True use_embedding = True elif model == 'uncertainty': use_transient = True elif model == 'appearance': use_embedding = True save_results_dir = config.log.save_results_dir os.makedirs(save_results_dir, exist_ok=True) train_loss_dict = { 'train_coarse_loss': 0.0, 'train_fine_loss': 0.0, 'train_total_loss': 0.0, } test_metric_dict = {'test_loss': 0.0, 'test_psnr': 0.0} monitor_manager = MonitorManager(train_loss_dict, test_metric_dict, save_results_dir) if dataset != 'phototourism': images, poses, _, hwf, i_test, i_train, near_plane, far_plane = get_data( config) height, width, focal_length = hwf else: di = get_photo_tourism_dataiterator(config, 'train', comm) val_di = get_photo_tourism_dataiterator(config, 'val', comm) if model != 'vanilla': if dataset != 'phototourism': config.train.n_vocab = max(np.max(i_train), np.max(i_test)) + 1 print( f'Setting Vocabulary size of embedding as {config.train.n_vocab}') if dataset != 'phototourism': if model in ['vanilla']: if comm is not None: # uncomment the following line to test on fewer images i_test = i_test[3 * comm.rank:3 * (comm.rank + 1)] pass else: # uncomment the following line to test on fewer images i_test = i_test[:3] pass else: # i_test = i_train[0:5] i_test = [i * (comm.rank + 1) for i in range(5)] else: i_test = [1] encode_position_function = get_encoding_function( config.train.num_encodings_position, True, True) if config.train.use_view_directions: encode_direction_function = get_encoding_function( config.train.num_encodings_direction, True, True) else: encode_direction_function = None lr = config.solver.lr num_decay_steps = config.solver.lr_decay_step * 1000 lr_decay_factor = config.solver.lr_decay_factor solver = S.Adam(alpha=lr) load_solver_state = False if config.checkpoint.param_path is not None: nn.load_parameters(config.checkpoint.param_path) load_solver_state = True if comm is not None: num_decay_steps /= comm.n_procs comm_size = comm.n_procs else: comm_size = 1 pbar = trange(config.train.num_iterations // comm_size, disable=(comm is not None and comm.rank > 0)) for i in pbar: if dataset != 'phototourism': idx = np.random.choice(i_train) image = nn.Variable.from_numpy_array(images[idx][None, :, :, :3]) pose = nn.Variable.from_numpy_array(poses[idx]) ray_directions, ray_origins = get_ray_bundle( height, width, focal_length, pose) grid = get_direction_grid(width, height, focal_length, return_ij_2d_grid=True) grid = F.reshape(grid, (-1, 2)) select_inds = np.random.choice(grid.shape[0], size=[config.train.num_rand_points], replace=False) select_inds = F.gather_nd(grid, select_inds[None, :]) select_inds = F.transpose(select_inds, (1, 0)) embed_inp = nn.Variable.from_numpy_array( np.full((config.train.chunksize_fine, ), idx, dtype=int)) ray_origins = F.gather_nd(ray_origins, select_inds) ray_directions = F.gather_nd(ray_directions, select_inds) image = F.gather_nd(image[0], select_inds) else: rays, embed_inp, image = di.next() ray_origins = nn.Variable.from_numpy_array(rays[:, :3]) ray_directions = nn.Variable.from_numpy_array(rays[:, 3:6]) near_plane = nn.Variable.from_numpy_array(rays[:, 6]) far_plane = nn.Variable.from_numpy_array(rays[:, 7]) embed_inp = nn.Variable.from_numpy_array(embed_inp) image = nn.Variable.from_numpy_array(image) hwf = None app_emb, trans_emb = None, None if use_embedding: with nn.parameter_scope('embedding_a'): app_emb = PF.embed(embed_inp, config.train.n_vocab, config.train.n_app) if use_transient: with nn.parameter_scope('embedding_t'): trans_emb = PF.embed(embed_inp, config.train.n_vocab, config.train.n_trans) if use_transient: rgb_map_course, rgb_map_fine, static_rgb_map_fine, transient_rgb_map_fine, beta, static_sigma, transient_sigma = forward_pass( ray_directions, ray_origins, near_plane, far_plane, app_emb, trans_emb, encode_position_function, encode_direction_function, config, use_transient, hwf=hwf, image=image) course_loss = 0.5 * F.mean(F.squared_error(rgb_map_course, image)) fine_loss = 0.5 * F.mean( F.squared_error(rgb_map_fine, image) / F.reshape(F.pow_scalar(beta, 2), beta.shape + (1, ))) beta_reg_loss = 3 + F.mean(F.log(beta)) sigma_trans_reg_loss = 0.01 * F.mean(transient_sigma) loss = course_loss + fine_loss + beta_reg_loss + sigma_trans_reg_loss else: rgb_map_course, _, _, _, rgb_map_fine, _, _, _ = forward_pass( ray_directions, ray_origins, near_plane, far_plane, app_emb, trans_emb, encode_position_function, encode_direction_function, config, use_transient, hwf=hwf) course_loss = F.mean(F.squared_error(rgb_map_course, image)) fine_loss = F.mean(F.squared_error(rgb_map_fine, image)) loss = course_loss + fine_loss pbar.set_description( f'Total: {np.around(loss.d, 4)}, Course: {np.around(course_loss.d, 4)}, Fine: {np.around(fine_loss.d, 4)}' ) solver.set_parameters(nn.get_parameters(), reset=False, retain_state=True) if load_solver_state: solver.load_states(config['checkpoint']['solver_path']) load_solver_state = False solver.zero_grad() loss.backward(clear_buffer=True) # Exponential LR decay if dataset != 'phototourism': lr_factor = (lr_decay_factor**((i) / num_decay_steps)) solver.set_learning_rate(lr * lr_factor) else: if i % num_decay_steps == 0 and i != 0: solver.set_learning_rate(lr * lr_decay_factor) if comm is not None: params = [x.grad for x in nn.get_parameters().values()] comm.all_reduce(params, division=False, inplace=True) solver.update() if ((i % config.train.save_interval == 0 or i == config.train.num_iterations - 1) and i != 0) and (comm is not None and comm.rank == 0): nn.save_parameters(os.path.join(save_results_dir, f'iter_{i}.h5')) solver.save_states( os.path.join(save_results_dir, f'solver_iter_{i}.h5')) if (i % config.train.test_interval == 0 or i == config.train.num_iterations - 1) and i != 0: avg_psnr, avg_mse = 0.0, 0.0 for i_t in trange(len(i_test)): if dataset != 'phototourism': idx_test = i_test[i_t] image = nn.NdArray.from_numpy_array( images[idx_test][None, :, :, :3]) pose = nn.NdArray.from_numpy_array(poses[idx_test]) ray_directions, ray_origins = get_ray_bundle( height, width, focal_length, pose) ray_directions = F.reshape(ray_directions, (-1, 3)) ray_origins = F.reshape(ray_origins, (-1, 3)) embed_inp = nn.NdArray.from_numpy_array( np.full((config.train.chunksize_fine, ), idx_test, dtype=int)) else: rays, embed_inp, image = val_di.next() ray_origins = nn.NdArray.from_numpy_array(rays[0, :, :3]) ray_directions = nn.NdArray.from_numpy_array(rays[0, :, 3:6]) near_plane_ = nn.NdArray.from_numpy_array(rays[0, :, 6]) far_plane_ = nn.NdArray.from_numpy_array(rays[0, :, 7]) embed_inp = nn.NdArray.from_numpy_array( embed_inp[0, :config.train.chunksize_fine]) image = nn.NdArray.from_numpy_array(image[0].transpose( 1, 2, 0)) image = F.reshape(image, (1, ) + image.shape) idx_test = 1 app_emb, trans_emb = None, None if use_embedding: with nn.parameter_scope('embedding_a'): app_emb = PF.embed(embed_inp, config.train.n_vocab, config.train.n_app) if use_transient: with nn.parameter_scope('embedding_t'): trans_emb = PF.embed(embed_inp, config.train.n_vocab, config.train.n_trans) num_ray_batches = ray_directions.shape[ 0] // config.train.ray_batch_size + 1 if use_transient: rgb_map_fine_list, static_rgb_map_fine_list, transient_rgb_map_fine_list = [], [], [] else: rgb_map_fine_list, depth_map_fine_list = [], [] for r_idx in trange(num_ray_batches): if r_idx != num_ray_batches - 1: ray_d, ray_o = ray_directions[ r_idx * config.train.ray_batch_size:(r_idx + 1) * config.train.ray_batch_size], ray_origins[ r_idx * config.train.ray_batch_size:(r_idx + 1) * config.train.ray_batch_size] if dataset == 'phototourism': near_plane = near_plane_[ r_idx * config.train.ray_batch_size:(r_idx + 1) * config.train.ray_batch_size] far_plane = far_plane_[r_idx * config.train.ray_batch_size: (r_idx + 1) * config.train.ray_batch_size] else: if ray_directions.shape[0] - ( num_ray_batches - 1) * config.train.ray_batch_size == 0: break ray_d, ray_o = ray_directions[ r_idx * config.train.ray_batch_size:, :], ray_origins[ r_idx * config.train.ray_batch_size:, :] if dataset == 'phototourism': near_plane = near_plane_[r_idx * config.train. ray_batch_size:] far_plane = far_plane_[r_idx * config.train. ray_batch_size:] if use_transient: rgb_map_course, rgb_map_fine, static_rgb_map_fine, transient_rgb_map_fine, beta, static_sigma, transient_sigma = forward_pass( ray_d, ray_o, near_plane, far_plane, app_emb, trans_emb, encode_position_function, encode_direction_function, config, use_transient, hwf=hwf) rgb_map_fine_list.append(rgb_map_fine) static_rgb_map_fine_list.append(static_rgb_map_fine) transient_rgb_map_fine_list.append( transient_rgb_map_fine) else: _, _, _, _, rgb_map_fine, depth_map_fine, _, _ = \ forward_pass(ray_d, ray_o, near_plane, far_plane, app_emb, trans_emb, encode_position_function, encode_direction_function, config, use_transient, hwf=hwf) rgb_map_fine_list.append(rgb_map_fine) depth_map_fine_list.append(depth_map_fine) if use_transient: rgb_map_fine = F.concatenate(*rgb_map_fine_list, axis=0) static_rgb_map_fine = F.concatenate( *static_rgb_map_fine_list, axis=0) transient_rgb_map_fine = F.concatenate( *transient_rgb_map_fine_list, axis=0) rgb_map_fine = F.reshape(rgb_map_fine, image[0].shape) static_rgb_map_fine = F.reshape(static_rgb_map_fine, image[0].shape) transient_rgb_map_fine = F.reshape(transient_rgb_map_fine, image[0].shape) static_trans_img_to_save = np.concatenate( (static_rgb_map_fine.data, np.ones((image[0].shape[0], 5, 3)), transient_rgb_map_fine.data), axis=1) img_to_save = np.concatenate( (image[0].data, np.ones( (image[0].shape[0], 5, 3)), rgb_map_fine.data), axis=1) else: rgb_map_fine = F.concatenate(*rgb_map_fine_list, axis=0) depth_map_fine = F.concatenate(*depth_map_fine_list, axis=0) rgb_map_fine = F.reshape(rgb_map_fine, image[0].shape) depth_map_fine = F.reshape(depth_map_fine, image[0].shape[:-1]) img_to_save = np.concatenate( (image[0].data, np.ones( (image[0].shape[0], 5, 3)), rgb_map_fine.data), axis=1) filename = os.path.join(save_results_dir, f'{i}_{idx_test}.png') try: imsave(filename, np.clip(img_to_save, 0, 1), channel_first=False) print(f'Saved generation at {filename}') if use_transient: filename_static_trans = os.path.join( save_results_dir, f's_t_{i}_{idx_test}.png') imsave(filename_static_trans, np.clip(static_trans_img_to_save, 0, 1), channel_first=False) else: filename_dm = os.path.join(save_results_dir, f'dm_{i}_{idx_test}.png') depth_map_fine = (depth_map_fine.data - depth_map_fine.data.min()) / ( depth_map_fine.data.max() - depth_map_fine.data.min()) imsave(filename_dm, depth_map_fine[:, :, None], channel_first=False) plt.imshow(depth_map_fine.data) plt.savefig(filename_dm) plt.close() except: pass avg_mse += F.mean(F.squared_error(rgb_map_fine, image[0])).data avg_psnr += (-10. * np.log10( F.mean(F.squared_error(rgb_map_fine, image[0])).data)) test_metric_dict['test_loss'] = avg_mse / len(i_test) test_metric_dict['test_psnr'] = avg_psnr / len(i_test) monitor_manager.add(i, test_metric_dict) print( f'Saved generations after {i} training iterations! Average PSNR: {avg_psnr/len(i_test)}. Average MSE: {avg_mse/len(i_test)}' )
def generate(): conf = get_config() # batch_size is forced to be 1 conf.train.batch_size = 1 image_shape = (conf.train.batch_size,) + \ tuple(x * conf.model.g_n_scales for x in [512, 1024]) # set context comm = init_nnabla(conf.nnabla_context) # find all test data if conf.train.data_set == "cityscapes": data_list = get_cityscape_datalist(conf.cityscapes, data_type="val", save_file=comm.rank == 0) conf.model.n_label_ids = conf.cityscapes.n_label_ids else: raise NotImplementedError( "Currently dataset {} is not supported.".format(conf.dataset)) if comm.n_procs > 1: data_list = get_data_lists_for_each_process(data_list, comm.n_procs)[comm.rank] # define generator generator = Generator(image_shape=image_shape, mconf=conf.model) # load parameters if not os.path.exists(conf.load_path): logger.warn( "Path to load params is not found." " Loading params is skipped and generated result will be unreasonable. ({})" .format(conf.load_path)) nn.load_parameters(conf.load_path) progress_iterator = trange(len(data_list) // conf.train.batch_size, desc="[Generating Images]", disable=comm.rank > 0) # for label2color label2color = Colorize(conf.model.n_label_ids) save_path = os.path.join(conf.train.save_path, "generated") if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) output_str = [] for i in progress_iterator: paths = data_list[i] image, instance_id, object_id = cityscapes_load_function( paths[0], paths[1], paths[2], image_shape[1:]) gen = generator(instance_id, object_id) gen = (gen - gen.min()) / (gen.max() - gen.min()) id_colorized = label2color(object_id).astype(np.uint8) gen_image_path = os.path.join(save_path, "res{}_{}.png".format(comm.rank, i)) input_image_path = os.path.join(save_path, "input_{}_{}.png".format(comm.rank, i)) imsave(gen_image_path, gen[0], channel_first=True) imsave(input_image_path, id_colorized) output_str.append(" ".join( [x for x in paths + [gen_image_path, input_image_path]])) if comm.rank == 0: with open(os.path.join(save_path, "in_out_pairs.txt"), "w") as f: f.write("\n".join(output_str))
def generate(): config = get_config() # batch_size is forced to be 1 config.train.batch_size = 1 image_shape = (config.train.batch_size, 3) + \ tuple(x * config.model.g_n_scales for x in [512, 512]) # set context comm = init_nnabla(config.nnabla_context) img_path_list = [ os.path.join(config.test_input_dir, path) for path in os.listdir(config.test_input_dir) ] test_image = nn.Variable(shape=image_shape) # define generator generator = LocalGenerator() generated_image, _, = generator( test_image, lg_channels=config.model.lg_channels, gg_channels=config.model.gg_channels, n_scales=config.model.g_n_scales, lg_n_residual_layers=config.model.lg_num_residual_loop, gg_n_residual_layers=config.model.gg_num_residual_loop) # load parameters if not os.path.exists(config.load_path): logger.warn( "Path to load params is not found." " Loading params is skipped and generated result will be unreasonable. ({})" .format(config.load_path)) nn.load_parameters(config.load_path) progress_iterator = trange(len(img_path_list) // config.train.batch_size, desc="[Generating Images]", disable=comm.rank > 0) save_path = os.path.join(config.test_output_dir) if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=True) for i in progress_iterator: path = img_path_list[i] test_image_data = get_var(path, image_shape[2:]) test_image.d = test_image_data generated_image.forward(clear_buffer=True) generated_image_data = (generated_image.d - generated_image.d.min()) / \ (generated_image.d.max() - generated_image.d.min()) test_image_data = test_image_data * 0.5 + 0.5 gen_image_path = os.path.join(save_path, "res{}_{}.png".format(comm.rank, i)) input_image_path = os.path.join(save_path, "input_{}_{}.png".format(comm.rank, i)) imsave(gen_image_path, generated_image_data[0], channel_first=True) imsave(input_image_path, test_image_data[0], channel_first=True)
def convert_image(args): file_name = args[0] source_dir = args[1] dest_dir = args[2] width = args[3] height = args[4] mode = args[5] ch = args[6] num_class = args[7] grid_size = args[8] anchors = args[9] src_file_name = os.path.join(source_dir, file_name) src_label_file_name = os.path.join( source_dir, os.path.splitext(file_name)[0] + ".txt") image_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + ".png") label_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + "_label.csv") region_file_name = os.path.join( dest_dir, 'data', os.path.splitext(file_name)[0] + "_region.csv") try: os.makedirs(os.path.dirname(image_file_name)) except OSError: pass # python2 does not support exists_ok arg # print(src_file_name, dest_file_name) # open source image labels = load_label(src_label_file_name) warp_func = None try: im = imread(src_file_name) if len(im.shape) < 2 or len(im.shape) > 3: logger.warning( "Illegal image file format %s.".format(src_file_name)) raise elif len(im.shape) == 3: # RGB image if im.shape[2] != 3: logger.warning( "The image must be RGB or monochrome.") csv_data.remove(data) raise # resize h = im.shape[0] w = im.shape[1] input_size = (w, h) # print(h, w) if w != width or h != height: # resize image if mode == 'trimming': # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) def trim_warp(label, input_size, output_size): w_scale = input_size[0] * 1.0 / output_size[0] h_scale = input_size[1] * 1.0 / output_size[1] label[0] = (label[0] - (1.0 - 1.0 / w_scale) * 0.5) * w_scale label[1] = (label[1] - (1.0 - 1.0 / h_scale) * 0.5) * h_scale label[3] *= w_scale label[4] *= h_scale return label warp_func = trim_warp elif mode == 'padding': # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) if len(im.shape) == 3: pad = pad + ((0, 0),) im = np.pad(im, pad, 'constant') # print('before', im.shape) def pad_warp(label, input_size, output_size): w_scale = input_size[0] * 1.0 / output_size[0] h_scale = input_size[1] * 1.0 / output_size[1] label[0] = (label[0] * w_scale + (1.0 - w_scale) * 0.5) label[1] = (label[1] * h_scale + (1.0 - h_scale) * 0.5) label[3] *= w_scale label[4] *= h_scale return label warp_func = pad_warp im = imresize(im, size=(width, height)) output_size = (width, height) # print('after', im.shape) # change color ch if len(im.shape) == 2 and ch == 3: # Monochrome to RGB im = np.array([im, im, im]).transpose((1, 2, 0)) elif len(im.shape) == 3 and ch == 1: # RGB to monochrome im = np.dot(im[..., :3], [0.299, 0.587, 0.114]).astype(np.uint8) # output image imsave(image_file_name, im) except: logger.warning( "Failed to convert %s." % (src_file_name)) raise # create label and region file if warp_func is not None: labels = [warp_func(label, input_size, output_size) for label in labels] grid_w = width // grid_size grid_h = height // grid_size label_array = np.full((len(anchors), grid_h, grid_w), -1, dtype=np.int) region_array = np.full( (len(anchors), grid_h, grid_w, 4), 0.0, dtype=np.float) for label in labels: label_rect = ObjectRect(XYWH=label[1:]).clip() if label_rect.width() > 0.0 and label_rect.height() > 0.0: gx, gy = int(label_rect.centerx() * grid_w), int(label_rect.centery() * grid_h) max_iou = 0 anchor_index = 0 for i, anchor in enumerate(anchors): anchor_rect = ObjectRect( XYWH=[(gx + 0.5) / grid_w, (gy + 0.5) / grid_h, anchor[0], anchor[1]]) iou = label_rect.iou(anchor_rect) if iou > max_iou: anchor_index = i max_iou = iou label_array[anchor_index][gy][gx] = int(label[0]) region_array[anchor_index][gy][gx] = [(label_rect.centerx() - anchor_rect.centerx()) * grid_w + 0.5, (label_rect.centery( ) - anchor_rect.centery()) * grid_h + 0.5, np.log(label_rect.width() * grid_w), np.log(label_rect.height() * grid_h)] np.savetxt(label_file_name, label_array.reshape( (label_array.shape[0] * label_array.shape[1], -1)), fmt='%i', delimiter=',') np.savetxt(region_file_name, region_array.reshape( (region_array.shape[0] * region_array.shape[1], -1)), fmt='%f', delimiter=',')
def main(**kwargs): # set training args args = AttrDict(kwargs) assert os.path.exists( args.config ), f"{args.config} is not found. Please make sure the config file exists." conf = read_yaml(args.config) comm = init_nnabla(ext_name="cudnn", device_id=args.device_id, type_config="float", random_pseed=True) if args.sampling_interval is None: args.sampling_interval = 1 use_timesteps = list( range(0, conf.num_diffusion_timesteps, args.sampling_interval)) if use_timesteps[-1] != conf.num_diffusion_timesteps - 1: # The last step should be included always. use_timesteps.append(conf.num_diffusion_timesteps - 1) # setup model variance type model_var_type = ModelVarType.FIXED_SMALL if "model_var_type" in conf: model_var_type = ModelVarType.get_vartype_from_key(conf.model_var_type) model = Model(beta_strategy=conf.beta_strategy, use_timesteps=use_timesteps, model_var_type=model_var_type, num_diffusion_timesteps=conf.num_diffusion_timesteps, attention_num_heads=conf.num_attention_heads, attention_resolutions=conf.attention_resolutions, scale_shift_norm=conf.ssn, base_channels=conf.base_channels, channel_mult=conf.channel_mult, num_res_blocks=conf.num_res_blocks) # load parameters assert os.path.exists( args.h5 ), f"{args.h5} is not found. Please make sure the h5 file exists." nn.parameter.load_parameters(args.h5) # Generate # sampling B = args.batch_size num_samples_per_iter = B * comm.n_procs num_iter = (args.samples + num_samples_per_iter - 1) // num_samples_per_iter local_saved_cnt = 0 for i in range(num_iter): logger.info(f"Generate samples {i + 1} / {num_iter}.") sample_out, _, x_starts = model.sample(shape=(B, ) + conf.image_shape[1:], dump_interval=1, use_ema=args.ema, progress=comm.rank == 0, use_ddim=args.ddim) # scale back to [0, 255] sample_out = (sample_out + 1) * 127.5 if args.tiled: save_path = os.path.join(args.output_dir, f"gen_{local_saved_cnt}_{comm.rank}.png") save_tiled_image(sample_out.astype(np.uint8), save_path) local_saved_cnt += 1 else: for b in range(B): save_path = os.path.join( args.output_dir, f"gen_{local_saved_cnt}_{comm.rank}.png") imsave(save_path, sample_out[b].astype(np.uint8), channel_first=True) local_saved_cnt += 1 # create video for x_starts if args.save_xstart: clips = [] for i in range(len(x_starts)): xstart = x_starts[i][1] assert isinstance(xstart, np.ndarray) im = get_tiled_image(np.clip((xstart + 1) * 127.5, 0, 255), channel_last=False).astype(np.uint8) clips.append(im) clip = mp.ImageSequenceClip(clips, fps=5) clip.write_videofile( os.path.join( args.output_dir, f"pred_x0_along_time_{local_saved_cnt}_{comm.rank}.mp4"))
def saveimage(path, img): img = (img * 0.5) + 0.5 # Normalize. imsave(path, img, channel_first=True)
def main(): args = get_args() names = np.genfromtxt(args.class_names, dtype=str, delimiter='?') rng = np.random.RandomState(1223) colors = rng.randint(0, 256, (args.classes, 3)).astype(np.uint8) colors = [tuple(c.tolist()) for c in colors] # Set context from nnabla.ext_utils import get_extension_context ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Load parameter _ = nn.load_parameters(args.weights) # Build a YOLO v2 network feature_dict = {} x = nn.Variable((1, 3, args.width, args.width)) y = yolov2.yolov2(x, args.num_anchors, args.classes, test=True, feature_dict=feature_dict) y = yolov2.yolov2_activate(y, args.num_anchors, args.anchors) y = F.nms_detection2d(y, args.thresh, args.nms, args.nms_per_class) # Read image img_orig = imread(args.input, num_channels=3) im_h, im_w, _ = img_orig.shape # letterbox w = args.width h = args.width if (w * 1.0 / im_w) < (h * 1. / im_h): new_w = w new_h = int((im_h * w) / im_w) else: new_h = h new_w = int((im_w * h) / im_h) patch = imresize(img_orig, (new_w, new_h)) / 255. img = np.ones((h, w, 3), np.float32) * 0.5 # resize x0 = int((w - new_w) / 2) y0 = int((h - new_h) / 2) img[y0:y0 + new_h, x0:x0 + new_w] = patch # Execute YOLO v2 print("forward") in_img = img.transpose(2, 0, 1).reshape(1, 3, args.width, args.width) x.d = in_img y.forward(clear_buffer=True) print("done") bboxes = y.d[0] img_draw = draw_bounding_boxes(img_orig, bboxes, im_w, im_h, names, colors, new_w * 1.0 / w, new_h * 1.0 / h, args.thresh) imsave(args.output, img_draw) # Timing s = time.time() n_time = 10 for i in range(n_time): x.d = in_img y.forward(clear_buffer=True) # Invoking device-to-host copy if CUDA # so that time contains data transfer. _ = y.d print("Processing time: {:.1f} [ms/image]".format( (time.time() - s) / n_time * 1000))