def encode_scene(images, viewpoints): # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) # Sample number of views total_views = images.shape[1] num_views = random.choice(range(1, total_views + 1)) # Sample views observation_view_indices = list(range(total_views)) random.shuffle(observation_view_indices) observation_view_indices = observation_view_indices[:num_views] observation_images = preprocess_images( images[:, observation_view_indices]) observation_query = viewpoints[:, observation_view_indices] representation = model.compute_observation_representation( observation_images, observation_query) # Sample query view query_index = random.choice(range(total_views)) query_images = preprocess_images(images[:, query_index]) query_viewpoints = viewpoints[:, query_index] # Transfer to gpu if necessary query_images = to_device(query_images, gpu_device) query_viewpoints = to_device(query_viewpoints, gpu_device) return representation, query_images, query_viewpoints
def main(): try: os.makedirs(args.figure_directory) except: pass #============================================================================== # Utilities #============================================================================== def read_files(directory): filenames = [] files = os.listdir(directory) # ipdb.set_trace() for filename in files: if filename.endswith(".h5"): filenames.append(filename) filenames.sort() dataset_images = [] dataset_viewpoints = [] for i in range(len(filenames)): F = h5py.File(os.path.join(directory, filenames[i])) tmp_images = list(F["images"]) tmp_viewpoints = list(F["viewpoints"]) dataset_images.extend(tmp_images) dataset_viewpoints.extend(tmp_viewpoints) # for i in range(len(filenames)): # images_npy_path = os.path.join(directory, "images", filenames[i]) # viewpoints_npy_path = os.path.join(directory, "viewpoints", filenames[i]) # tmp_images = np.load(images_npy_path) # tmp_viewpoints = np.load(viewpoints_npy_path) # assert tmp_images.shape[0] == tmp_viewpoints.shape[0] # dataset_images.extend(tmp_images) # dataset_viewpoints.extend(tmp_viewpoints) dataset_images = np.array(dataset_images) dataset_viewpoints = np.array(dataset_viewpoints) dataset = list() for i in range(len(dataset_images)): item = { 'image': dataset_images[i], 'viewpoint': dataset_viewpoints[i] } dataset.append(item) return dataset def to_device(array): # if using_gpu: array = cuda.to_gpu(array) return array def fill_observations_axis(observation_images): axis_observations_image = np.full( (3, image_shape[1], total_observations_per_scene * image_shape[2]), black_color, dtype=np.float32) num_current_obs = len(observation_images) total_obs = total_observations_per_scene width = image_shape[2] x_start = width * (total_obs - num_current_obs) // 2 for obs_image in observation_images: x_end = x_start + width axis_observations_image[:, :, x_start:x_end] = obs_image x_start += width return axis_observations_image def compute_camera_angle_at_frame(t): return t * 2 * math.pi / (fps * 2) def rotate_query_viewpoint(horizontal_angle_rad, camera_distance, camera_position_y): camera_position = np.array([ camera_distance * math.sin(horizontal_angle_rad), # x camera_position_y, camera_distance * math.cos(horizontal_angle_rad), # z ]) center = np.array((0, camera_position_y, 0)) camera_direction = camera_position - center yaw, pitch = compute_yaw_and_pitch(camera_direction) query_viewpoints = xp.array( ( camera_position[0], camera_position[1], camera_position[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch), ), dtype=np.float32, ) query_viewpoints = xp.broadcast_to(query_viewpoints, (1, ) + query_viewpoints.shape) return query_viewpoints def render(representation, camera_distance, camera_position_y, total_frames, animation_frame_array, rotate_camera=True): # viewpoint_file = open('viewpoints.txt','w') for t in range(0, total_frames): artist_array = [ axis_observations.imshow(cv2.cvtColor( make_uint8(axis_observations_image), cv2.COLOR_BGR2RGB), interpolation="none", animated=True) ] horizontal_angle_rad = compute_camera_angle_at_frame(t) if rotate_camera == False: horizontal_angle_rad = compute_camera_angle_at_frame(0) query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad, camera_distance, camera_position_y) generated_images = model.generate_image(query_viewpoints, representation)[0] generated_images = chainer.backends.cuda.to_cpu(generated_images) generated_images = make_uint8(generated_images) generated_images = cv2.cvtColor(generated_images, cv2.COLOR_BGR2RGB) artist_array.append( axis_generation.imshow(generated_images, interpolation="none", animated=True)) animation_frame_array.append(artist_array) def render_wVar(representation, camera_distance, camera_position_y, total_frames, animation_frame_array, no_of_samples, rotate_camera=True, wVariance=True): # highest_var = 0.0 # with open("queries.txt",'w') as file_wviews, open("variance.txt",'w') as file_wvar: for t in range(0, total_frames): artist_array = [ axis_observations.imshow(cv2.cvtColor( make_uint8(axis_observations_image), cv2.COLOR_BGR2RGB), interpolation="none", animated=True) ] horizontal_angle_rad = compute_camera_angle_at_frame(t) if rotate_camera == False: horizontal_angle_rad = compute_camera_angle_at_frame(0) query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad, camera_distance, camera_position_y) # q_x, q_y, q_z, _, _, _, _ = query_viewpoints[0] # file_wviews.writelines("".join(str(q_x))+", "+ # "".join(str(q_y))+", "+ # "".join(str(q_z))+"\n") generated_images = cp.squeeze( cp.array( model.generate_images(query_viewpoints, representation, no_of_samples))) # ipdb.set_trace() var_image = cp.var(generated_images, axis=0) mean_image = cp.mean(generated_images, axis=0) mean_image = make_uint8( np.squeeze(chainer.backends.cuda.to_cpu(mean_image))) mean_image_rgb = cv2.cvtColor(mean_image, cv2.COLOR_BGR2RGB) var_image = chainer.backends.cuda.to_cpu(var_image) # grayscale r, g, b = var_image gray_var_image = 0.2989 * r + 0.5870 * g + 0.1140 * b # thresholding Otsu's method # thresh = threshold_otsu(gray_var_image) # var_binary = gray_var_image > thresh ## hill climb algorthm for searching highest variance # cur_var = np.mean(gray_var_image) # if cur_var>highest_var: # highest_var = cur_var # if wVariance==True: # print('highest variance: '+str(highest_var)+', viewpoint: '+str(query_viewpoints[0])) # highest_var_vp = query_viewpoints[0] # file_wvar.writelines('highest variance: '+str(highest_var)+', viewpoint: '+str(highest_var_vp)+'\n') # else: # pass artist_array.append( axis_generation_var.imshow(gray_var_image, cmap=plt.cm.gray, interpolation="none", animated=True)) artist_array.append( axis_generation_mean.imshow(mean_image_rgb, interpolation="none", animated=True)) animation_frame_array.append(artist_array) # if wVariance==True: # print('final highest variance: '+str(highest_var)+', viewpoint: '+str(highest_var_vp)) # file_wvar.writelines('final highest variance: '+str(highest_var)+', viewpoint: '+str(highest_var_vp)+'\n') # else: # pass # file_wviews.close() # file_wvar.close() # loading dataset & model cuda.get_device(args.gpu_device).use() xp = cp hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) chainer.serializers.load_hdf5(args.snapshot_file, model) model.to_gpu() total_observations_per_scene = 4 fps = 30 black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.zeros( (3, image_shape[1], total_observations_per_scene * image_shape[2]), dtype=np.float32) #============================================================================== # Visualization #============================================================================== plt.style.use("dark_background") fig = plt.figure(figsize=(6, 7)) plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95) # fig.suptitle("GQN") axis_observations = fig.add_subplot(2, 1, 1) axis_observations.axis("off") axis_observations.set_title("observations") axis_generation = fig.add_subplot(2, 1, 2) axis_generation.axis("off") axis_generation.set_title("Rendered Predictions") axis_generation_var = fig.add_subplot(2, 2, 3) axis_generation_var.axis("off") axis_generation_var.set_title("Variance Render") axis_generation_mean = fig.add_subplot(2, 2, 4) axis_generation_mean.axis("off") axis_generation_mean.set_title("Mean Render") # iterator dataset = read_files(args.dataset_directory) file_number = 1 with chainer.no_backprop_mode(): iterator = chainer.iterators.SerialIterator(dataset, batch_size=1) # ipdb.set_trace() for i in tqdm(range(len(iterator.dataset))): animation_frame_array = [] images, viewpoints = np.array([ iterator.dataset[i]["image"] ]), np.array([iterator.dataset[i]["viewpoint"]]) camera_distance = np.mean( np.linalg.norm(viewpoints[:, :, :3], axis=2)) camera_position_y = np.mean(viewpoints[:, :, 1]) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 total_views = images.shape[1] random_observation_view_indices = list(range(total_views)) random.shuffle(random_observation_view_indices) random_observation_view_indices = random_observation_view_indices[: total_observations_per_scene] observed_images = images[batch_index, random_observation_view_indices] observed_viewpoints = viewpoints[batch_index, random_observation_view_indices] observed_images = to_device(observed_images) observed_viewpoints = to_device(observed_viewpoints) # Scene encoder representation = model.compute_observation_representation( observed_images[None, :1], observed_viewpoints[None, :1]) # Update figure observation_index = random_observation_view_indices[0] observed_image = images[batch_index, observation_index] axis_observations_image = fill_observations_axis([observed_image]) # Neural rendering # render(representation, camera_distance, camera_position_y, # fps * 2, animation_frame_array) render_wVar(representation, camera_distance, camera_position_y, fps * 2, animation_frame_array, 100) for n in range(total_observations_per_scene): observation_indices = random_observation_view_indices[:n + 1] axis_observations_image = fill_observations_axis( images[batch_index, observation_indices]) # Scene encoder representation = model.compute_observation_representation( observed_images[None, :n + 1], observed_viewpoints[None, :n + 1]) # Neural rendering # render(representation, camera_distance, camera_position_y, # fps // 2, animation_frame_array,rotate_camera=False) render_wVar(representation, camera_distance, camera_position_y, fps // 2, animation_frame_array, 100, rotate_camera=False, wVariance=False) # Scene encoder with all given observations representation = model.compute_observation_representation( observed_images[None, :total_observations_per_scene + 1], observed_viewpoints[None, :total_observations_per_scene + 1]) # Neural rendering # render(representation, camera_distance, camera_position_y, # fps * 6, animation_frame_array) render_wVar(representation, camera_distance, camera_position_y, fps * 6, animation_frame_array, 100) anim = animation.ArtistAnimation( fig, animation_frame_array, interval=1 / fps, # originally 1/fps blit=True, repeat_delay=0) anim.save("{}/observations_{}.gif".format(args.figure_directory, file_number), writer="imagemagick", fps=10) # ipdb.set_trace() # anim.save( # "{}/rooms_ring_camera_observations_{}.mp4".format( # args.figure_directory, file_number), # writer='ffmpeg', # fps=10) file_number += 1
def main(): try: os.mkdir(args.snapshot_directory) except: pass np.random.seed(0) xp = np device_gpu = args.gpu_device device_cpu = -1 using_gpu = device_gpu >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_directory) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.generator_share_upsampler = args.generator_share_upsampler hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.h_channels = args.h_channels hyperparams.z_channels = args.z_channels hyperparams.u_channels = args.u_channels hyperparams.image_size = (args.image_size, args.image_size) hyperparams.representation_channels = args.representation_channels hyperparams.representation_architecture = args.representation_architecture hyperparams.pixel_n = args.pixel_n hyperparams.pixel_sigma_i = args.initial_pixel_variance hyperparams.pixel_sigma_f = args.final_pixel_variance hyperparams.save(args.snapshot_directory) print(hyperparams) model = Model(hyperparams, snapshot_directory=args.snapshot_directory, optimized=args.optimized) if using_gpu: model.to_gpu() scheduler = Scheduler(sigma_start=args.initial_pixel_variance, sigma_end=args.final_pixel_variance, final_num_updates=args.pixel_n, snapshot_directory=args.snapshot_directory) print(scheduler) optimizer = AdamOptimizer(model.parameters, mu_i=args.initial_lr, mu_f=args.final_lr, initial_training_step=scheduler.num_updates) print(optimizer) pixel_var = xp.full((args.batch_size, 3) + hyperparams.image_size, scheduler.pixel_variance**2, dtype="float32") pixel_ln_var = xp.full((args.batch_size, 3) + hyperparams.image_size, math.log(scheduler.pixel_variance**2), dtype="float32") representation_shape = (args.batch_size, hyperparams.representation_channels, args.image_size // 4, args.image_size // 4) fig = plt.figure(figsize=(9, 3)) axis_data = fig.add_subplot(1, 3, 1) axis_data.set_title("Data") axis_data.axis("off") axis_reconstruction = fig.add_subplot(1, 3, 2) axis_reconstruction.set_title("Reconstruction") axis_reconstruction.axis("off") axis_generation = fig.add_subplot(1, 3, 3) axis_generation.set_title("Generation") axis_generation.axis("off") current_training_step = 0 for iteration in range(args.training_iterations): mean_kld = 0 mean_nll = 0 mean_mse = 0 mean_elbo = 0 total_num_batch = 0 start_time = time.time() for subset_index, subset in enumerate(dataset): iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) total_views = images.shape[1] # Sample number of views num_views = random.choice(range(1, total_views + 1)) observation_view_indices = list(range(total_views)) random.shuffle(observation_view_indices) observation_view_indices = observation_view_indices[:num_views] query_index = random.choice(range(total_views)) if num_views > 0: observation_images = preprocess_images( images[:, observation_view_indices]) observation_query = viewpoints[:, observation_view_indices] representation = model.compute_observation_representation( observation_images, observation_query) else: representation = xp.zeros(representation_shape, dtype="float32") representation = chainer.Variable(representation) # Sample query query_index = random.choice(range(total_views)) query_images = preprocess_images(images[:, query_index]) query_viewpoints = viewpoints[:, query_index] # Transfer to gpu if necessary query_images = to_device(query_images, device_gpu) query_viewpoints = to_device(query_viewpoints, device_gpu) z_t_param_array, mean_x = model.sample_z_and_x_params_from_posterior( query_images, query_viewpoints, representation) # Compute loss ## KL Divergence loss_kld = 0 for params in z_t_param_array: mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params kld = gqn.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) ## Negative log-likelihood of generated image loss_nll = cf.sum( gqn.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var)) # Calculate the average loss value loss_nll = loss_nll / args.batch_size loss_kld = loss_kld / args.batch_size loss = loss_nll / scheduler.pixel_variance + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) loss_nll = float(loss_nll.data) + math.log(256.0) loss_kld = float(loss_kld.data) elbo = -(loss_nll + loss_kld) loss_mse = float( cf.mean_squared_error(query_images, mean_x).data) printr( "Iteration {}: Subset {} / {}: Batch {} / {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {:.6e} kld: {:.5f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {} " .format(iteration + 1, subset_index + 1, len(dataset), batch_index + 1, len(iterator), elbo, loss_nll, loss_mse, loss_kld, optimizer.learning_rate, scheduler.pixel_variance, current_training_step)) scheduler.step(iteration, current_training_step) pixel_var[...] = scheduler.pixel_variance**2 pixel_ln_var[...] = math.log(scheduler.pixel_variance**2) total_num_batch += 1 current_training_step += 1 mean_kld += loss_kld mean_nll += loss_nll mean_mse += loss_mse mean_elbo += elbo model.serialize(args.snapshot_directory) # Visualize if args.with_visualization: axis_data.imshow(make_uint8(query_images[0]), interpolation="none") axis_reconstruction.imshow(make_uint8(mean_x.data[0]), interpolation="none") with chainer.no_backprop_mode(): generated_x = model.generate_image( query_viewpoints[None, 0], representation[None, 0]) axis_generation.imshow(make_uint8(generated_x[0]), interpolation="none") plt.pause(1e-8) elapsed_time = time.time() - start_time print( "\033[2KIteration {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {:.6e} kld: {:.5f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {} - time: {:.3f} min" .format(iteration + 1, mean_elbo / total_num_batch, mean_nll / total_num_batch, mean_mse / total_num_batch, mean_kld / total_num_batch, optimizer.learning_rate, scheduler.pixel_variance, current_training_step, elapsed_time / 60)) model.serialize(args.snapshot_directory)
def main(): try: os.mkdir(args.output_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters(snapshot_directory=args.snapshot_path) model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() plt.style.use("dark_background") fig = plt.figure(figsize=(10, 5)) axis_observation_array = [] axis_observation_array.append(fig.add_subplot(2, 4, 1)) axis_observation_array.append(fig.add_subplot(2, 4, 2)) axis_observation_array.append(fig.add_subplot(2, 4, 5)) axis_observation_array.append(fig.add_subplot(2, 4, 6)) for axis in axis_observation_array: axis.axis("off") axis_generation_array = [] axis_generation_array.append(fig.add_subplot(2, 4, 3)) axis_generation_array.append(fig.add_subplot(2, 4, 4)) axis_generation_array.append(fig.add_subplot(2, 4, 7)) axis_generation_array.append(fig.add_subplot(2, 4, 8)) for axis in axis_generation_array: axis.axis("off") num_views_per_scene = 4 num_generation = 4 total_frames_per_rotation = 24 image_shape = (3, ) + hyperparams.image_size blank_image = np.full(image_shape, -0.5) file_number = 1 with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: artist_frame_array = [] observed_image_array = xp.zeros( (num_views_per_scene, ) + image_shape, dtype=np.float32) observed_viewpoint_array = xp.zeros((num_views_per_scene, 7), dtype=np.float32) # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 # Generate images without observations r = xp.zeros(( num_generation, hyperparams.representation_channels, ) + hyperparams.chrz_size, dtype=np.float32) angle_rad = 0 for t in range(total_frames_per_rotation): artist_array = [] for axis in axis_observation_array: axis_image = axis.imshow(make_uint8(blank_image), interpolation="none", animated=True) artist_array.append(axis_image) query_viewpoints = rotate_query_viewpoint( angle_rad, num_generation, xp) generated_images = model.generate_image( query_viewpoints, r) for j, axis in enumerate(axis_generation_array): image = make_uint8(generated_images[j]) axis_image = axis.imshow(image, interpolation="none", animated=True) artist_array.append(axis_image) angle_rad += 2 * math.pi / total_frames_per_rotation # plt.pause(1e-8) axis = axis_generation_array[-1] add_annotation(axis, artist_array) artist_frame_array.append(artist_array) # Generate images with observations for m in range(num_views_per_scene): observed_image = images[batch_index, m] observed_viewpoint = viewpoints[batch_index, m] observed_image_array[m] = to_gpu(observed_image) observed_viewpoint_array[m] = to_gpu(observed_viewpoint) r = model.compute_observation_representation( observed_image_array[None, :m + 1], observed_viewpoint_array[None, :m + 1]) r = cf.broadcast_to(r, (num_generation, ) + r.shape[1:]) angle_rad = 0 for t in range(total_frames_per_rotation): artist_array = [] for axis, observed_image in zip( axis_observation_array, observed_image_array): axis_image = axis.imshow( make_uint8(observed_image), interpolation="none", animated=True) artist_array.append(axis_image) query_viewpoints = rotate_query_viewpoint( angle_rad, num_generation, xp) generated_images = model.generate_image( query_viewpoints, r) for j in range(num_generation): axis = axis_generation_array[j] axis_image = axis.imshow(make_uint8( generated_images[j]), interpolation="none", animated=True) artist_array.append(axis_image) angle_rad += 2 * math.pi / total_frames_per_rotation # plt.pause(1e-8) axis = axis_generation_array[-1] add_annotation(axis, artist_array) artist_frame_array.append(artist_array) # plt.tight_layout() # plt.subplots_adjust( # left=None, # bottom=None, # right=None, # top=None, # wspace=0, # hspace=0) anim = animation.ArtistAnimation(fig, artist_frame_array, interval=1 / 24, blit=True, repeat_delay=0) anim.save("{}/rooms_ring_camera_{}.gif".format( args.output_directory, file_number), writer="imagemagick") anim.save("{}/rooms_ring_camera_{}.mp4".format( args.output_directory, file_number), writer="ffmpeg", fps=12) file_number += 1
def main(): try: os.makedirs(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp dataset = gqn.data.Dataset(args.dataset_directory) meter = Meter() assert meter.load(args.snapshot_directory) hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) assert model.load(args.snapshot_directory, meter.epoch) if using_gpu: model.to_gpu() total_observations_per_scene = 4 fps = 30 black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.zeros( (3, image_shape[1], total_observations_per_scene * image_shape[2]), dtype=np.float32) #============================================================================== # Utilities #============================================================================== def to_device(array): if using_gpu: array = cuda.to_gpu(array) return array def fill_observations_axis(observation_images): axis_observations_image = np.full( (3, image_shape[1], total_observations_per_scene * image_shape[2]), black_color, dtype=np.float32) num_current_obs = len(observation_images) total_obs = total_observations_per_scene width = image_shape[2] x_start = width * (total_obs - num_current_obs) // 2 for obs_image in observation_images: x_end = x_start + width axis_observations_image[:, :, x_start:x_end] = obs_image x_start += width return axis_observations_image def compute_camera_angle_at_frame(t): horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4 y_rad_top = math.pi / 3 y_rad_bottom = -math.pi / 3 y_rad_range = y_rad_bottom - y_rad_top if t < fps * 1.5: vertical_angle_rad = y_rad_top elif fps * 1.5 <= t and t < fps * 2.5: interp = (t - fps * 1.5) / fps vertical_angle_rad = y_rad_top + interp * y_rad_range elif fps * 2.5 <= t and t < fps * 4: vertical_angle_rad = y_rad_bottom elif fps * 4.0 <= t and t < fps * 5: interp = (t - fps * 4.0) / fps vertical_angle_rad = y_rad_bottom - interp * y_rad_range else: vertical_angle_rad = y_rad_top return horizontal_angle_rad, vertical_angle_rad def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad): camera_direction = np.array([ math.sin(horizontal_angle_rad), # x math.sin(vertical_angle_rad), # y math.cos(horizontal_angle_rad), # z ]) camera_direction = args.camera_distance * camera_direction / np.linalg.norm( camera_direction) yaw, pitch = compute_yaw_and_pitch(camera_direction) query_viewpoints = xp.array( ( camera_direction[0], camera_direction[1], camera_direction[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch), ), dtype=np.float32, ) query_viewpoints = xp.broadcast_to(query_viewpoints, (1, ) + query_viewpoints.shape) return query_viewpoints #============================================================================== # Visualization #============================================================================== plt.style.use("dark_background") fig = plt.figure(figsize=(6, 7)) plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95) # fig.suptitle("GQN") axis_observations = fig.add_subplot(2, 1, 1) axis_observations.axis("off") axis_observations.set_title("observations") axis_generation = fig.add_subplot(2, 1, 2) axis_generation.axis("off") axis_generation.set_title("neural rendering") #============================================================================== # Generating animation #============================================================================== file_number = 1 random.seed(0) np.random.seed(0) with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: animation_frame_array = [] observed_image_array = xp.full( (total_observations_per_scene, ) + image_shape, black_color, dtype=np.float32) observed_viewpoint_array = xp.zeros( (total_observations_per_scene, 7), dtype=np.float32) # shape: (batch, views, height, width, channels) images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 #------------------------------------------------------------------------------ # Generate images with a single observation #------------------------------------------------------------------------------ observation_index = 0 # Scene encoder observed_image = images[batch_index, observation_index] observed_viewpoint = viewpoints[batch_index, observation_index] observed_image_array[observation_index] = to_device( observed_image) observed_viewpoint_array[observation_index] = to_device( observed_viewpoint) representation = model.compute_observation_representation( observed_image_array[None, :observation_index + 1], observed_viewpoint_array[None, :observation_index + 1]) # Update figure axis_observations_image = fill_observations_axis( [observed_image]) # Rotate camera for t in range(fps, fps * 6): artist_array = [ axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) ] horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame( t) query_viewpoints = rotate_query_viewpoint( horizontal_angle_rad, vertical_angle_rad) generated_images = model.generate_image( query_viewpoints, representation)[0] artist_array.append( axis_generation.imshow(make_uint8(generated_images), interpolation="none", animated=True)) animation_frame_array.append(artist_array) #------------------------------------------------------------------------------ # Add observations #------------------------------------------------------------------------------ for n in range(total_observations_per_scene): axis_observations_image = fill_observations_axis( images[batch_index, :n + 1]) # Scene encoder representation = model.compute_observation_representation( observed_image_array[None, :n + 1], observed_viewpoint_array[None, :n + 1]) for t in range(fps // 2): artist_array = [ axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) ] horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame( 0) query_viewpoints = rotate_query_viewpoint( horizontal_angle_rad, vertical_angle_rad) generated_images = model.generate_image( query_viewpoints, representation)[0] artist_array.append( axis_generation.imshow( make_uint8(generated_images), interpolation="none", animated=True)) animation_frame_array.append(artist_array) #------------------------------------------------------------------------------ # Generate images with all observations #------------------------------------------------------------------------------ # Scene encoder representation = model.compute_observation_representation( observed_image_array[None, :total_observations_per_scene + 1], observed_viewpoint_array[ None, :total_observations_per_scene + 1]) # Rotate camera for t in range(0, fps * 6): artist_array = [ axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) ] horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame( t) query_viewpoints = rotate_query_viewpoint( horizontal_angle_rad, vertical_angle_rad) generated_images = model.generate_image( query_viewpoints, representation)[0] artist_array.append( axis_generation.imshow(make_uint8(generated_images), interpolation="none", animated=True)) animation_frame_array.append(artist_array) #------------------------------------------------------------------------------ # Write to file #------------------------------------------------------------------------------ anim = animation.ArtistAnimation(fig, animation_frame_array, interval=1 / fps, blit=True, repeat_delay=0) # anim.save( # "{}/shepard_matzler_observations_{}.gif".format( # args.figure_directory, file_number), # writer="imagemagick", # fps=fps) anim.save("{}/shepard_matzler_observations_{}.mp4".format( args.figure_directory, file_number), writer="ffmpeg", fps=fps) file_number += 1
def gqn_process(): # load model my_gpu = args.gpu_device if my_gpu < 0: xp = np else: cuda.get_device(args.gpu_device).use() xp = cp hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) chainer.serializers.load_hdf5(args.snapshot_file, model) if my_gpu > -1: model.to_gpu() chainer.print_runtime_info() observed_viewpoint, observed_image, offset = data_recv.get() observed_viewpoint = np.expand_dims(np.expand_dims( np.asarray(observed_viewpoint).astype(np.float32), axis=0), axis=0) observed_image = np.expand_dims(np.expand_dims( np.asarray(observed_image).astype(np.float32), axis=0), axis=0) offset = np.asarray(offset) camera_distance = np.mean( np.linalg.norm(observed_viewpoint[:, :, :3], axis=2)) camera_position_z = np.mean(observed_viewpoint[:, :, 1]) observed_image = observed_image.transpose( (0, 1, 4, 2, 3)).astype(np.float32) observed_image = preprocess_images(observed_image) # create representation and generate uncertainty map of environment [1000 viewpoints?] total_frames = 10 representation = model.compute_observation_representation( observed_image, observed_viewpoint) # get predictions highest_var = 0.0 no_of_samples = 20 highest_var_vp = 0 try: for i in range(0, total_frames): horizontal_angle_rad = compute_camera_angle_at_frame( i, total_frames) query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad, camera_distance, camera_position_z, xp) generated_images = xp.squeeze( xp.array( model.generate_images(query_viewpoints, representation, no_of_samples))) var_image = xp.var(generated_images, axis=0) # var_image = chainer.backends.cuda.to_cpu(var_image) # grayscale # r,g,b = var_image # gray_var_image = 0.2989*r+0.5870*g+0.1140*b current_var = xp.mean(var_image) if highest_var == 0: highest_var = current_var highest_var_vp = query_viewpoints[0] elif current_var > highest_var: highest_var = current_var highest_var_vp = query_viewpoints[0] except KeyboardInterrupt: logging.warning('interrupt') # return next viewpoint and unit vector of end effector based on highest uncertainty found in the uncertainty map _x, _y, _z, _, _, _, _ = highest_var_vp _yaw, _pitch = compute_yaw_and_pitch([_x, _y, _z]) next_viewpoint = [_x, _y, _z, _yaw, _pitch] next_viewpoint = [chainer.backends.cuda.to_cpu(x) for x in next_viewpoint] next_viewpoint = [float(x) for x in next_viewpoint] data_send.put(next_viewpoint)
def main(): try: os.mkdir(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters(snapshot_directory=args.snapshot_path) model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() plt.style.use("dark_background") fig = plt.figure(figsize=(10, 5)) fig.suptitle("GQN") axis_observations = fig.add_subplot(1, 2, 1) axis_observations.axis("off") axis_observations.set_title("Observations") axis_generation = fig.add_subplot(1, 2, 2) axis_generation.axis("off") axis_generation.set_title("Generation") total_observations_per_scene = 2**2 num_observations_per_column = int(math.sqrt(total_observations_per_scene)) num_generation = 1 total_frames_per_rotation = 48 black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.full( (3, num_observations_per_column * image_shape[1], num_observations_per_column * image_shape[2]), black_color, dtype=np.float32) file_number = 1 with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: animation_frame_array = [] axis_observations_image[...] = black_color observed_image_array = xp.full( (total_observations_per_scene, ) + image_shape, black_color, dtype=np.float32) observed_viewpoint_array = xp.zeros( (total_observations_per_scene, 7), dtype=np.float32) # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 # Generate images without observations representation = xp.zeros(( num_generation, hyperparams.representation_channels, ) + (hyperparams.image_size[0] // 4, hyperparams.image_size[1] // 4), dtype=np.float32) angle_rad = 0 for t in range(total_frames_per_rotation): artist_array = [ axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) ] query_viewpoints = rotate_query_viewpoint( angle_rad, num_generation, xp) generated_image = model.generate_image_from_zero_z( query_viewpoints, representation)[0] artist_array.append( axis_generation.imshow(make_uint8(generated_image), interpolation="none", animated=True)) angle_rad += 2 * math.pi / total_frames_per_rotation animation_frame_array.append(artist_array) # Generate images with observations for observation_index in range(total_observations_per_scene): observed_image = images[batch_index, observation_index] observed_viewpoint = viewpoints[batch_index, observation_index] observed_image_array[observation_index] = to_gpu( observed_image) observed_viewpoint_array[observation_index] = to_gpu( observed_viewpoint) representation = model.compute_observation_representation( observed_image_array[None, :observation_index + 1], observed_viewpoint_array[None, :observation_index + 1]) representation = cf.broadcast_to(representation, (num_generation, ) + representation.shape[1:]) # Update figure x_start = image_shape[1] * (observation_index % num_observations_per_column) x_end = x_start + image_shape[1] y_start = image_shape[2] * (observation_index // num_observations_per_column) y_end = y_start + image_shape[2] axis_observations_image[:, y_start:y_end, x_start:x_end] = observed_image angle_rad = 0 for t in range(total_frames_per_rotation): artist_array = [ axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) ] query_viewpoints = rotate_query_viewpoint( angle_rad, num_generation, xp) generated_images = model.generate_image_from_zero_z( query_viewpoints, representation)[0] artist_array.append( axis_generation.imshow( make_uint8(generated_images), interpolation="none", animated=True)) angle_rad += 2 * math.pi / total_frames_per_rotation animation_frame_array.append(artist_array) anim = animation.ArtistAnimation(fig, animation_frame_array, interval=1 / 24, blit=True, repeat_delay=0) anim.save("{}/shepard_matzler_observations_{}.gif".format( args.figure_directory, file_number), writer="imagemagick") anim.save("{}/shepard_matzler_observations_{}.mp4".format( args.figure_directory, file_number), writer="ffmpeg", fps=12) file_number += 1
def main(): try: os.makedirs(args.figure_directory) except: pass # loading dataset & model cuda.get_device(args.gpu_device).use() xp=cp hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) chainer.serializers.load_hdf5(args.snapshot_file, model) model.to_gpu() total_observations_per_scene = 4 fps = 30 black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.zeros( (3, image_shape[1], total_observations_per_scene * image_shape[2]), dtype=np.float32) #============================================================================== # Utilities #============================================================================== def read_files(directory): filenames = [] files = os.listdir(directory) # ipdb.set_trace() for filename in files: if filename.endswith(".h5"): filenames.append(filename) filenames.sort() dataset_images = [] dataset_viewpoints = [] for i in range(len(filenames)): F = h5py.File(os.path.join(directory,filenames[i])) tmp_images = list(F["images"]) tmp_viewpoints = list(F["viewpoints"]) dataset_images.extend(tmp_images) dataset_viewpoints.extend(tmp_viewpoints) # for i in range(len(filenames)): # images_npy_path = os.path.join(directory, "images", filenames[i]) # viewpoints_npy_path = os.path.join(directory, "viewpoints", filenames[i]) # tmp_images = np.load(images_npy_path) # tmp_viewpoints = np.load(viewpoints_npy_path) # assert tmp_images.shape[0] == tmp_viewpoints.shape[0] # dataset_images.extend(tmp_images) # dataset_viewpoints.extend(tmp_viewpoints) dataset_images = np.array(dataset_images) dataset_viewpoints = np.array(dataset_viewpoints) dataset = list() for i in range(len(dataset_images)): item = {'image':dataset_images[i],'viewpoint':dataset_viewpoints[i]} dataset.append(item) return dataset def to_device(array): # if using_gpu: array = cuda.to_gpu(array) return array def fill_observations_axis(observation_images): axis_observations_image = np.full( (3, image_shape[1], total_observations_per_scene * image_shape[2]), black_color, dtype=np.float32) num_current_obs = len(observation_images) total_obs = total_observations_per_scene width = image_shape[2] x_start = width * (total_obs - num_current_obs) // 2 for obs_image in observation_images: x_end = x_start + width axis_observations_image[:, :, x_start:x_end] = obs_image x_start += width return axis_observations_image def compute_camera_angle_at_frame(t): return t * 2 * math.pi / (fps * 2) def rotate_query_viewpoint(horizontal_angle_rad, camera_distance, camera_position_y): camera_position = np.array([ camera_distance * math.sin(horizontal_angle_rad), # x camera_position_y, camera_distance * math.cos(horizontal_angle_rad), # z ]) center = np.array((0, camera_position_y, 0)) camera_direction = camera_position - center yaw, pitch = compute_yaw_and_pitch(camera_direction) query_viewpoints = xp.array( ( camera_position[0], camera_position[1], camera_position[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch), ), dtype=np.float32, ) query_viewpoints = xp.broadcast_to(query_viewpoints, (1, ) + query_viewpoints.shape) return query_viewpoints def render(representation, camera_distance, camera_position_y, total_frames, animation_frame_array, rotate_camera=True): for t in range(0, total_frames): artist_array = [ axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) ] horizontal_angle_rad = compute_camera_angle_at_frame(t) if rotate_camera == False: horizontal_angle_rad = compute_camera_angle_at_frame(0) query_viewpoints = rotate_query_viewpoint( horizontal_angle_rad, camera_distance, camera_position_y) generated_images = model.generate_image(query_viewpoints, representation)[0] artist_array.append( axis_generation.imshow( make_uint8(generated_images), interpolation="none", animated=True)) animation_frame_array.append(artist_array) #============================================================================== # Visualization #============================================================================== plt.style.use("dark_background") fig = plt.figure(figsize=(6, 7)) plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95) # fig.suptitle("GQN") axis_observations = fig.add_subplot(2, 1, 1) axis_observations.axis("off") axis_observations.set_title("observations") axis_generation = fig.add_subplot(2, 1, 2) axis_generation.axis("off") axis_generation.set_title("neural rendering") #============================================================================== # Generating animation #============================================================================== dataset = read_files(args.dataset_directory) file_number = 1 random.seed(0) np.random.seed(0) with chainer.no_backprop_mode(): iterator = chainer.iterators.SerialIterator(dataset,batch_size=1) for i in range(len(iterator.dataset)): animation_frame_array = [] # shape: (batch, views, height, width, channels) images, viewpoints = np.array([iterator.dataset[i]["image"]]),np.array([iterator.dataset[i]["viewpoint"]]) camera_distance = np.mean( np.linalg.norm(viewpoints[:, :, :3], axis=2)) camera_position_y = np.mean(viewpoints[:, :, 1]) # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 total_views = images.shape[1] random_observation_view_indices = list(range(total_views)) random.shuffle(random_observation_view_indices) random_observation_view_indices = random_observation_view_indices[: total_observations_per_scene] #------------------------------------------------------------------------------ # Observations #------------------------------------------------------------------------------ observed_images = images[batch_index, random_observation_view_indices] observed_viewpoints = viewpoints[ batch_index, random_observation_view_indices] observed_images = to_device(observed_images) observed_viewpoints = to_device(observed_viewpoints) #------------------------------------------------------------------------------ # Generate images with a single observation #------------------------------------------------------------------------------ # Scene encoder representation = model.compute_observation_representation( observed_images[None, :1], observed_viewpoints[None, :1]) # Update figure observation_index = random_observation_view_indices[0] observed_image = images[batch_index, observation_index] axis_observations_image = fill_observations_axis( [observed_image]) # Neural rendering render(representation, camera_distance, camera_position_y, fps * 2, animation_frame_array) #------------------------------------------------------------------------------ # Add observations #------------------------------------------------------------------------------ for n in range(total_observations_per_scene): observation_indices = random_observation_view_indices[:n + 1] axis_observations_image = fill_observations_axis( images[batch_index, observation_indices]) # Scene encoder representation = model.compute_observation_representation( observed_images[None, :n + 1], observed_viewpoints[None, :n + 1]) # Neural rendering render( representation, camera_distance, camera_position_y, fps // 2, animation_frame_array, rotate_camera=False) #------------------------------------------------------------------------------ # Generate images with all observations #------------------------------------------------------------------------------ # Scene encoder representation = model.compute_observation_representation( observed_images[None, :total_observations_per_scene + 1], observed_viewpoints[None, :total_observations_per_scene + 1]) # Neural rendering render(representation, camera_distance, camera_position_y, fps * 4, animation_frame_array) #------------------------------------------------------------------------------ # Write to file #------------------------------------------------------------------------------ anim = animation.ArtistAnimation( fig, animation_frame_array, interval=1 / fps, blit=True, repeat_delay=0) # anim.save( # "{}/shepard_matzler_observations_{}.gif".format( # args.figure_directory, file_number), # writer="imagemagick", # fps=fps) anim.save( "{}/rooms_ring_camera_observations_{}.mp4".format( args.figure_directory, file_number), writer="ffmpeg", fps=fps) file_number += 1
def main(): try: os.mkdir(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters(snapshot_directory=args.snapshot_path) model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() plt.style.use("dark_background") fig = plt.figure(figsize=(15, 5)) fig.suptitle("GQN") axis_observations = fig.add_subplot(1, 3, 1) axis_observations.axis("off") axis_observations.set_title("Observations") axis_ground_truth = fig.add_subplot(1, 3, 2) axis_ground_truth.axis("off") axis_ground_truth.set_title("Ground Truth") axis_reconstruction = fig.add_subplot(1, 3, 3) axis_reconstruction.axis("off") axis_reconstruction.set_title("Reconstruction") total_observations_per_scene = 2**2 num_observations_per_column = int(math.sqrt(total_observations_per_scene)) black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.full( (3, num_observations_per_column * image_shape[1], num_observations_per_column * image_shape[2]), black_color, dtype=np.float32) file_number = 1 with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: animation_frame_array = [] axis_observations_image[...] = black_color observed_image_array = xp.full( (total_observations_per_scene, ) + image_shape, black_color, dtype=np.float32) observed_viewpoint_array = xp.zeros( (total_observations_per_scene, 7), dtype=np.float32) # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 query_index = total_observations_per_scene query_image = images[batch_index, query_index] query_viewpoint = to_gpu(viewpoints[None, batch_index, query_index]) axis_ground_truth.imshow(make_uint8(query_image), interpolation="none") for observation_index in range(total_observations_per_scene): observed_image = images[batch_index, observation_index] observed_viewpoint = viewpoints[batch_index, observation_index] observed_image_array[observation_index] = to_gpu( observed_image) observed_viewpoint_array[observation_index] = to_gpu( observed_viewpoint) representation = model.compute_observation_representation( observed_image_array[None, :observation_index + 1], observed_viewpoint_array[None, :observation_index + 1]) representation = cf.broadcast_to(representation, (1, ) + representation.shape[1:]) # Update figure x_start = image_shape[1] * (observation_index % num_observations_per_column) x_end = x_start + image_shape[1] y_start = image_shape[2] * (observation_index // num_observations_per_column) y_end = y_start + image_shape[2] axis_observations_image[:, y_start:y_end, x_start:x_end] = observed_image axis_observations.imshow( make_uint8(axis_observations_image), interpolation="none", animated=True) generated_images = model.generate_image( query_viewpoint, representation)[0] axis_reconstruction.imshow(make_uint8(generated_images), interpolation="none") plt.pause(1)
def main(): try: os.mkdir(args.output_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters(snapshot_directory=args.snapshot_path) model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() plt.style.use("dark_background") fig = plt.figure(figsize=(10, 5)) axis_observation_array = [] axis_observation_array.append(plt.subplot2grid((2, 4), (0, 0))) axis_observation_array.append(plt.subplot2grid((2, 4), (0, 1))) axis_observation_array.append(plt.subplot2grid((2, 4), (1, 0))) axis_observation_array.append(plt.subplot2grid((2, 4), (1, 1))) for axis in axis_observation_array: axis.axis("off") axis_generation = plt.subplot2grid((2, 4), (0, 2), rowspan=2, colspan=2) axis_generation.axis("off") num_views_per_scene = 4 num_generation = 1 total_frames_per_movement = 72 image_shape = (3, ) + hyperparams.image_size blank_image = np.full(image_shape, -0.5) file_number = 1 with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: artist_frame_array = [] observed_image_array = xp.full( (num_views_per_scene, ) + image_shape, -0.5, dtype=np.float32) observed_viewpoint_array = xp.zeros((num_views_per_scene, 7), dtype=np.float32) # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 # Generate images without observations r = xp.zeros(( num_generation, hyperparams.representation_channels, ) + hyperparams.chrz_size, dtype=np.float32) # Generate images with observations for m in range(num_views_per_scene): observed_image = images[batch_index, m] observed_viewpoint = viewpoints[batch_index, m] observed_image_array[m] = to_gpu(observed_image) observed_viewpoint_array[m] = to_gpu(observed_viewpoint) r = model.compute_observation_representation( observed_image_array[None, :m + 1], observed_viewpoint_array[None, :m + 1]) r = cf.broadcast_to(r, (num_generation, ) + r.shape[1:]) grid_size = 8 trajectory_length = grid_size / 3 eye_start = (-trajectory_length, -0.125, trajectory_length) eye_end = (-trajectory_length, -0.125, -trajectory_length) center_start = (-trajectory_length, -0.125, grid_size / 2) center_end = (-trajectory_length, -0.125, 0) for t in range(total_frames_per_movement): artist_array = [] for axis, observed_image in zip( axis_observation_array, observed_image_array): axis_image = axis.imshow( make_uint8(observed_image), interpolation="none", animated=True) artist_array.append(axis_image) interp = t / (total_frames_per_movement - 1) eye = interpolate(eye_start, eye_end, interp) center = interpolate(center_start, center_end, interp) query_viewpoints = make_query_viewpoint( eye, center, num_generation, xp) generated_images = model.generate_image( query_viewpoints, r) image = make_uint8(generated_images[0]) axis_image = axis_generation.imshow( image, interpolation="none", animated=True) artist_array.append(axis_image) # plt.pause(1e-8) add_annotation(axis_generation, artist_array) artist_frame_array.append(artist_array) eye_start = (-trajectory_length, -0.125, -trajectory_length) eye_end = (trajectory_length, -0.125, -trajectory_length) center_start = (-trajectory_length, -0.125, 0) center_end = (trajectory_length, -0.125, 0) for t in range(total_frames_per_movement): artist_array = [] for axis, observed_image in zip( axis_observation_array, observed_image_array): axis_image = axis.imshow( make_uint8(observed_image), interpolation="none", animated=True) artist_array.append(axis_image) interp = t / (total_frames_per_movement - 1) eye = interpolate(eye_start, eye_end, interp) center = interpolate(center_start, center_end, interp) query_viewpoints = make_query_viewpoint( eye, center, num_generation, xp) generated_images = model.generate_image( query_viewpoints, r) image = make_uint8(generated_images[0]) axis_image = axis_generation.imshow( image, interpolation="none", animated=True) artist_array.append(axis_image) # plt.pause(1e-8) add_annotation(axis_generation, artist_array) artist_frame_array.append(artist_array) eye_start = (trajectory_length, -0.125, -trajectory_length) eye_end = (trajectory_length, -0.125, trajectory_length) center_start = (trajectory_length, -0.125, 0) center_end = (trajectory_length, -0.125, grid_size / 2) for t in range(total_frames_per_movement): artist_array = [] for axis, observed_image in zip( axis_observation_array, observed_image_array): axis_image = axis.imshow( make_uint8(observed_image), interpolation="none", animated=True) artist_array.append(axis_image) interp = t / (total_frames_per_movement - 1) eye = interpolate(eye_start, eye_end, interp) center = interpolate(center_start, center_end, interp) query_viewpoints = make_query_viewpoint( eye, center, num_generation, xp) generated_images = model.generate_image( query_viewpoints, r) image = make_uint8(generated_images[0]) axis_image = axis_generation.imshow( image, interpolation="none", animated=True) artist_array.append(axis_image) # plt.pause(1e-8) add_annotation(axis_generation, artist_array) artist_frame_array.append(artist_array) plt.tight_layout() plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0, hspace=0) anim = animation.ArtistAnimation(fig, artist_frame_array, interval=1 / 24, blit=True, repeat_delay=0) anim.save("{}/rooms_free_camera_{}.gif".format( args.output_directory, file_number), writer="imagemagick") anim.save("{}/rooms_free_camera_{}.mp4".format( args.output_directory, file_number), writer="ffmpeg", fps=12) file_number += 1
def main(): try: os.makedirs(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp dataset = gqn.data.Dataset(args.dataset_directory) meter = Meter() assert meter.load(args.snapshot_directory) hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) assert model.load(args.snapshot_directory, meter.epoch) if using_gpu: model.to_gpu() #============================================================================== # Visualization #============================================================================== plt.figure(figsize=(12, 16)) axis_observation_1 = plt.subplot2grid((4, 3), (0, 0)) axis_observation_2 = plt.subplot2grid((4, 3), (0, 1)) axis_observation_3 = plt.subplot2grid((4, 3), (0, 2)) axis_predictions = plt.subplot2grid((4, 3), (1, 0), rowspan=3, colspan=3) axis_observation_1.axis("off") axis_observation_2.axis("off") axis_observation_3.axis("off") axis_predictions.set_xticks([], []) axis_predictions.set_yticks([], []) axis_observation_1.set_title("Observation 1", fontsize=22) axis_observation_2.set_title("Observation 2", fontsize=22) axis_observation_3.set_title("Observation 3", fontsize=22) axis_predictions.set_title("Neural Rendering", fontsize=22) axis_predictions.set_xlabel("Yaw", fontsize=22) axis_predictions.set_ylabel("Pitch", fontsize=22) #============================================================================== # Generating images #============================================================================== num_views_per_scene = 3 num_yaw_pitch_steps = 10 image_width, image_height = hyperparams.image_size prediction_images = make_uint8( np.full((num_yaw_pitch_steps * image_width, num_yaw_pitch_steps * image_height, 3), 0)) file_number = 1 random.seed(0) np.random.seed(0) with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] camera_distance = np.mean( np.linalg.norm(viewpoints[:, :, :3], axis=2)) # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 #------------------------------------------------------------------------------ # Observations #------------------------------------------------------------------------------ total_views = images.shape[1] random_observation_view_indices = list(range(total_views)) random.shuffle(random_observation_view_indices) random_observation_view_indices = random_observation_view_indices[: num_views_per_scene] observed_images = images[:, random_observation_view_indices] observed_viewpoints = viewpoints[:, random_observation_view_indices] representation = model.compute_observation_representation( observed_images, observed_viewpoints) axis_observation_1.imshow( make_uint8(observed_images[batch_index, 0])) axis_observation_2.imshow( make_uint8(observed_images[batch_index, 1])) axis_observation_3.imshow( make_uint8(observed_images[batch_index, 2])) y_angle_rad = math.pi / 2 for pitch_loop in range(num_yaw_pitch_steps): camera_y = math.sin(y_angle_rad) x_angle_rad = math.pi for yaw_loop in range(num_yaw_pitch_steps): camera_direction = np.array([ math.sin(x_angle_rad), camera_y, math.cos(x_angle_rad) ]) camera_direction = camera_distance * camera_direction / np.linalg.norm( camera_direction) yaw, pitch = compute_yaw_and_pitch(camera_direction) query_viewpoints = xp.array( ( camera_direction[0], camera_direction[1], camera_direction[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch), ), dtype=np.float32, ) query_viewpoints = xp.broadcast_to( query_viewpoints, (1, ) + query_viewpoints.shape) generated_images = model.generate_image( query_viewpoints, representation)[0] yi_start = pitch_loop * image_height yi_end = (pitch_loop + 1) * image_height xi_start = yaw_loop * image_width xi_end = (yaw_loop + 1) * image_width prediction_images[yi_start:yi_end, xi_start:xi_end] = make_uint8( generated_images) x_angle_rad -= 2 * math.pi / num_yaw_pitch_steps y_angle_rad -= math.pi / num_yaw_pitch_steps axis_predictions.imshow(prediction_images) plt.savefig("{}/shepard_metzler_predictions_{}.png".format( args.figure_directory, file_number)) file_number += 1
def main(): ############################################## # To avoid OpenMPI bug multiprocessing.set_start_method("forkserver") p = multiprocessing.Process(target=print, args=("", )) p.start() p.join() ############################################## try: os.mkdir(args.snapshot_directory) except: pass comm = chainermn.create_communicator() device = comm.intra_rank print("device", device, "/", comm.size) cuda.get_device(device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_directory) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.generator_share_upsampler = args.generator_share_upsampler hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.h_channels = args.h_channels hyperparams.z_channels = args.z_channels hyperparams.u_channels = args.u_channels hyperparams.image_size = (args.image_size, args.image_size) hyperparams.representation_channels = args.representation_channels hyperparams.representation_architecture = args.representation_architecture hyperparams.pixel_n = args.pixel_n hyperparams.pixel_sigma_i = args.initial_pixel_variance hyperparams.pixel_sigma_f = args.final_pixel_variance if comm.rank == 0: hyperparams.save(args.snapshot_directory) ## Debug ## hyperparams.save("results") print(hyperparams) model = Model( hyperparams, snapshot_directory=args.snapshot_directory, optimized=args.optimized) model.to_gpu() optimizer = optimizer_by_name( args.optimizer, model.parameters, communicator=comm, mu_i=args.initial_lr, mu_f=args.final_lr) if comm.rank == 0: print(optimizer) scheduler = Scheduler( sigma_start=args.initial_pixel_variance, sigma_end=args.final_pixel_variance, final_num_updates=args.pixel_n, snapshot_directory=args.snapshot_directory) if comm.rank == 0: print(scheduler) pixel_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, scheduler.pixel_variance**2, dtype="float32") pixel_ln_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, math.log(scheduler.pixel_variance**2), dtype="float32") random.seed(0) subset_indices = list(range(len(dataset.subset_filenames))) representation_shape = (args.batch_size, hyperparams.representation_channels, args.image_size // 4, args.image_size // 4) current_training_step = scheduler.num_updates for iteration in range(scheduler.iteration, args.training_iterations): mean_kld = 0 mean_nll = 0 mean_mse = 0 mean_elbo = 0 total_num_batch = 0 subset_size_per_gpu = len(subset_indices) // comm.size if len(subset_indices) % comm.size != 0: subset_size_per_gpu += 1 start_time = time.time() for subset_loop in range(subset_size_per_gpu): random.shuffle(subset_indices) subset_index = subset_indices[comm.rank] subset = dataset.read(subset_index) iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) total_views = images.shape[1] # Sample observations num_views = random.choice(range(1, total_views + 1)) observation_view_indices = list(range(total_views)) random.shuffle(observation_view_indices) observation_view_indices = observation_view_indices[:num_views] if num_views > 0: observation_images = preprocess_images( images[:, observation_view_indices]) observation_query = viewpoints[:, observation_view_indices] representation = model.compute_observation_representation( observation_images, observation_query) else: representation = xp.zeros( representation_shape, dtype="float32") representation = chainer.Variable(representation) # Sample query query_index = random.choice(range(total_views)) query_images = preprocess_images(images[:, query_index]) query_viewpoints = viewpoints[:, query_index] # Transfer to gpu query_images = to_gpu(query_images) query_viewpoints = to_gpu(query_viewpoints) z_t_param_array, mean_x = model.sample_z_and_x_params_from_posterior( query_images, query_viewpoints, representation) # Compute loss ## KL Divergence loss_kld = chainer.Variable(xp.zeros((), dtype=xp.float32)) for params in z_t_param_array: mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params kld = gqn.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) ##Negative log-likelihood of generated image loss_nll = cf.sum( gqn.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var)) # Calculate the average loss value loss_nll = loss_nll / args.batch_size loss_kld = loss_kld / args.batch_size loss = (loss_nll / scheduler.pixel_variance) + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) loss_nll = float(loss_nll.data) loss_kld = float(loss_kld.data) elbo = -(loss_nll + loss_kld) loss_mse = float( cf.mean_squared_error(query_images, mean_x).data) # if comm.rank == 0: # printr( # "Iteration {}: Subset {} / {}: Batch {} / {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {:.5f} kld: {:.5f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {} ". # format(iteration, subset_loop + 1, # subset_size_per_gpu, batch_index + 1, # len(iterator), elbo, loss_nll, loss_mse, # loss_kld, optimizer.learning_rate, # scheduler.pixel_variance, # current_training_step)) total_num_batch += 1 current_training_step += 1 mean_kld += loss_kld mean_nll += loss_nll mean_mse += loss_mse mean_elbo += elbo scheduler.step(iteration, current_training_step) pixel_var[...] = scheduler.pixel_variance**2 pixel_ln_var[...] = math.log(scheduler.pixel_variance**2) # keys = ("name", "memory.total", "memory.free", "memory.used", # "utilization.gpu", "utilization.memory") # cmd = "nvidia-smi --query-gpu={} --format=csv".format( # ",".join(keys)) # output = str(subprocess.check_output(cmd, shell=True)) # if comm.rank == 0: # print(output) # if comm.rank == 0: # model.serialize(args.snapshot_directory) if comm.rank == 0: elapsed_time = time.time() - start_time mean_elbo /= total_num_batch mean_nll /= total_num_batch mean_mse /= total_num_batch mean_kld /= total_num_batch print( "\033[2KIteration {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {} kld: {:.6f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {} - time: {:.3f} min". format(iteration, mean_elbo, mean_nll, mean_mse, mean_kld, optimizer.learning_rate, scheduler.pixel_variance, current_training_step, elapsed_time / 60)) model.serialize(args.snapshot_directory) scheduler.save(args.snapshot_directory) ## Debug ## model.serialize("results")
def main(): start_time = time.time() writer = SummaryWriter('/GQN/chainer-gqn/tensor-log') try: os.makedirs(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp dataset = gqn.data.Dataset(args.dataset_directory) meter = Meter() assert meter.load(args.snapshot_directory) hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) assert model.load(args.snapshot_directory, meter.epoch) if using_gpu: model.to_gpu() total_observations_per_scene = 4 fps = 30 black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.zeros( (3, image_shape[1], total_observations_per_scene * image_shape[2]), dtype=np.float32) #============================================================================== # Utilities #============================================================================== def to_device(array): if using_gpu: array = cuda.to_gpu(array) return array def fill_observations_axis(observation_images): axis_observations_image = np.full( (3, image_shape[1], total_observations_per_scene * image_shape[2]), black_color, dtype=np.float32) num_current_obs = len(observation_images) total_obs = total_observations_per_scene width = image_shape[2] x_start = width * (total_obs - num_current_obs) // 2 for obs_image in observation_images: x_end = x_start + width axis_observations_image[:, :, x_start:x_end] = obs_image x_start += width return axis_observations_image def compute_camera_angle_at_frame(t): horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4 y_rad_top = math.pi / 3 y_rad_bottom = -math.pi / 3 y_rad_range = y_rad_bottom - y_rad_top if t < fps * 1.5: vertical_angle_rad = y_rad_top elif fps * 1.5 <= t and t < fps * 2.5: interp = (t - fps * 1.5) / fps vertical_angle_rad = y_rad_top + interp * y_rad_range elif fps * 2.5 <= t and t < fps * 4: vertical_angle_rad = y_rad_bottom elif fps * 4.0 <= t and t < fps * 5: interp = (t - fps * 4.0) / fps vertical_angle_rad = y_rad_bottom - interp * y_rad_range else: vertical_angle_rad = y_rad_top return horizontal_angle_rad, vertical_angle_rad def compute_vertical_rotation_at_frame(horizontal, vertical, t): # move horizontal view only horizontal_angle_rad = horizontal + (t - fps) * (math.pi / 64) vertical_angle_rad = vertical + 0 return horizontal_angle_rad, vertical_angle_rad def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad, camera_distance): camera_direction = np.array([ math.sin(horizontal_angle_rad), # x math.sin(vertical_angle_rad), # y math.cos(horizontal_angle_rad), # z ]) # removed linalg norm for observation purposes camera_direction = camera_distance * camera_direction # ipdb.set_trace() yaw, pitch = compute_yaw_and_pitch(camera_direction) query_viewpoints = xp.array( ( camera_direction[0], camera_direction[1], camera_direction[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch), ), dtype=np.float32, ) query_viewpoints = xp.broadcast_to(query_viewpoints, (1, ) + query_viewpoints.shape) return query_viewpoints def render(representation, camera_distance, obs_viewpoint, start_t, end_t, animation_frame_array, savename=None, rotate_camera=True): all_var_bg = [] all_var = [] all_var_z = [] all_q_view = [] all_c = [] all_h = [] all_u = [] for t in range(start_t, end_t): artist_array = [ axis_observations.imshow(make_uint8(axis_observations_image), interpolation="none", animated=True) ] # convert x,y into radians?? # try reversing the camera direction calculation in rotate query viewpoint (impossible to reverse the linalg norm...) horizontal_angle_rad = np.arctan2(obs_viewpoint[0], obs_viewpoint[2]) vertical_angle_rad = np.arcsin(obs_viewpoint[1] / camera_distance) # xz_diagonal = np.sqrt(np.square(obs_viewpoint[0])+np.square(obs_viewpoint[2])) # vertical_angle_rad = np.arctan2(obs_viewpoint[1],xz_diagonal) # vertical_angle_rad = np.arcsin(obs_viewpoint[1]/camera_distance) # horizontal_angle_rad, vertical_angle_rad = 0,0 # ipdb.set_trace() horizontal_angle_rad, vertical_angle_rad = compute_vertical_rotation_at_frame( horizontal_angle_rad, vertical_angle_rad, t) if rotate_camera == False: horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame( 0) query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad, camera_distance) # obtain generated images, as well as mean and variance before gaussian generated_images, var_bg, latent_z, ct = model.generate_multi_image( query_viewpoints, representation, 100) logging.info("retrieved variables, time elapsed: " + str(time.time() - start_time)) # cpu_generated_images = chainer.backends.cuda.to_cpu(generated_images) generated_images = np.squeeze(generated_images) latent_z = np.squeeze(latent_z) # ipdb.set_trace() ct = np.squeeze(ct) # ht = np.squeeze(np.asarray(ht)) # ut = np.squeeze(np.asarray(ut)) # obtain data from Chainer Variable and obtain mean var_bg = cp.mean(var_bg, axis=0) logging.info("variance of bg, time elapsed: " + str(time.time() - start_time)) var_z = cp.var(latent_z, axis=0) logging.info("variance of z, time elapsed: " + str(time.time() - start_time)) # ipdb.set_trace() # print(ct.shape()) var_c = cp.var(ct, axis=0) logging.info("variance of c, time elapsed: " + str(time.time() - start_time)) # var_h = cp.var(ht,axis=0) # var_u = cp.var(ut,axis=0) # write viewpoint and image variance to file gen_img_var = np.var(generated_images, axis=0) logging.info("calculated variance of gen images, time elapsed: " + str(time.time() - start_time)) all_var_bg.append((var_bg)[None]) all_var.append((gen_img_var)[None]) all_var_z.append((var_z)[None]) all_q_view.append( chainer.backends.cuda.to_cpu(horizontal_angle_rad)[None] * 180 / math.pi) all_c.append((var_c)[None]) logging.info("appending, time elapsed: " + str(time.time() - start_time)) # all_h.append(chainer.backends.cuda.to_cpu(var_h)[None]) # all_u.append(chainer.backends.cuda.to_cpu(var_u)[None]) # sample = generated_images[0] pred_mean = cp.mean(generated_images, axis=0) # artist_array.append( # axis_generation.imshow( # make_uint8(pred_mean), # interpolation="none", # animated=True)) # animation_frame_array.append(artist_array) all_var_bg = np.concatenate(chainer.backends.cuda.to_cpu(all_var_bg), axis=0) all_var = np.concatenate(chainer.backends.cuda.to_cpu(all_var), axis=0) all_var_z = np.concatenate(chainer.backends.cuda.to_cpu(all_var_z), axis=0) all_c = np.concatenate(chainer.backends.cuda.to_cpu(all_c), axis=0) # all_h = np.concatenate(all_h,axis=0) # all_u = np.concatenate(all_u,axis=0) logging.info("concatenating, time elapsed: " + str(time.time() - start_time)) with h5py.File(savename, "a") as f: f.create_dataset("variance_all_viewpoints", data=all_var) f.create_dataset("query_viewpoints", data=np.squeeze(np.asarray(all_q_view))) f.create_dataset("variance_b4_gaussian", data=all_var_bg) f.create_dataset("variance_of_z", data=all_var_z) f.create_dataset("c", data=all_c) # f.create_dataset("h",data=all_h) # f.create_dataset("u",data=all_u) logging.info("saving, time elapsed: " + str(time.time() - start_time)) #============================================================================== # Visualization #============================================================================== plt.style.use("dark_background") fig = plt.figure(figsize=(6, 7)) plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95) # fig.suptitle("GQN") axis_observations = fig.add_subplot(2, 1, 1) axis_observations.axis("off") axis_observations.set_title("observations") axis_generation = fig.add_subplot(2, 1, 2) axis_generation.axis("off") axis_generation.set_title("neural rendering") #============================================================================== # Generating animation #============================================================================== file_number = 1 random.seed(0) np.random.seed(0) logging.basicConfig( level=logging.INFO, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s' ) with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: animation_frame_array = [] # shape: (batch, views, height, width, channels) images, viewpoints = subset[data_indices] camera_distance = np.mean( np.linalg.norm(viewpoints[:, :, :3], axis=2)) # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) logging.info('preprocess ' + str(time.time() - start_time)) batch_index = 0 total_views = images.shape[1] random_observation_view_indices = list(range(total_views)) random.shuffle(random_observation_view_indices) random_observation_view_indices = random_observation_view_indices[: total_observations_per_scene] #------------------------------------------------------------------------------ # Observations #------------------------------------------------------------------------------ observed_images = images[batch_index, random_observation_view_indices] observed_viewpoints = viewpoints[ batch_index, random_observation_view_indices] observed_images = to_device(observed_images) observed_viewpoints = to_device(observed_viewpoints) #------------------------------------------------------------------------------ # Generate images with a single observation #------------------------------------------------------------------------------ # Scene encoder representation = model.compute_observation_representation( observed_images[None, :1], observed_viewpoints[None, :1]) # Update figure observation_index = random_observation_view_indices[0] observed_image = images[batch_index, observation_index] axis_observations_image = fill_observations_axis( [observed_image]) # save observed viewpoint filename = "{}/variance_{}.hdf5".format( args.figure_directory, file_number) if os.path.exists(filename): os.remove(filename) with h5py.File(filename, "a") as f: f.create_dataset("observed_viewpoint", data=chainer.backends.cuda.to_cpu( observed_viewpoints[0])) f.create_dataset( "obs_viewpoint_horizontal_angle", data=np.arcsin( chainer.backends.cuda.to_cpu( observed_viewpoints[0][0]) / camera_distance) * 180 / math.pi) logging.info('write 2 variables to hdf5 file, time elapsed: ' + str(time.time() - start_time)) obs_viewpoint = np.squeeze(observed_viewpoints[0]) # Neural rendering render(representation, camera_distance, observed_viewpoints[0], fps, fps * 6, animation_frame_array, savename=filename) logging.info( 'write 4 other variables to hdf5 file, time elapsed: ' + str(time.time() - start_time)) #------------------------------------------------------------------------------ # Write to file #------------------------------------------------------------------------------ # anim = animation.ArtistAnimation( # fig, # animation_frame_array, # interval=1 / fps, # blit=True, # repeat_delay=0) # anim.save( # "{}/shepard_metzler_observations_{}.gif".format( # args.figure_directory, file_number), # writer="imagemagick", # fps=fps) # anim.save( # "{}/shepard_metzler_observations_{}.mp4".format( # args.figure_directory, file_number), # writer="ffmpeg", # fps=2) if file_number == 20: break else: file_number += 1
def main(): try: os.makedirs(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp dataset = gqn.data.Dataset(args.dataset_directory, # use_ground_truth=True ) meter = Meter() assert meter.load(args.snapshot_directory) hyperparams = HyperParameters() assert hyperparams.load(args.snapshot_directory) model = Model(hyperparams) assert model.load(args.snapshot_directory, meter.epoch) if using_gpu: model.to_gpu() total_observations_per_scene = 4 fps = 30 black_color = -0.5 image_shape = (3, ) + hyperparams.image_size axis_observations_image = np.zeros( (3, image_shape[1], total_observations_per_scene * image_shape[2]), dtype=np.float32) #============================================================================== # Utilities #============================================================================== def to_device(array): if using_gpu: array = cuda.to_gpu(array) return array def fill_observations_axis(observation_images): axis_observations_image = np.full( (3, image_shape[1], total_observations_per_scene * image_shape[2]), black_color, dtype=np.float32) num_current_obs = len(observation_images) total_obs = total_observations_per_scene width = image_shape[2] x_start = width * (total_obs - num_current_obs) // 2 for obs_image in observation_images: x_end = x_start + width axis_observations_image[:, :, x_start:x_end] = obs_image x_start += width return axis_observations_image def compute_camera_angle_at_frame(t): horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4 y_rad_top = math.pi / 3 y_rad_bottom = -math.pi / 3 y_rad_range = y_rad_bottom - y_rad_top if t < fps * 1.5: vertical_angle_rad = y_rad_top elif fps * 1.5 <= t and t < fps * 2.5: interp = (t - fps * 1.5) / fps vertical_angle_rad = y_rad_top + interp * y_rad_range elif fps * 2.5 <= t and t < fps * 4: vertical_angle_rad = y_rad_bottom elif fps * 4.0 <= t and t < fps * 5: interp = (t - fps * 4.0) / fps vertical_angle_rad = y_rad_bottom - interp * y_rad_range else: vertical_angle_rad = y_rad_top return horizontal_angle_rad, vertical_angle_rad def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad, camera_distance): camera_direction = np.array([ math.sin(horizontal_angle_rad), # x math.sin(vertical_angle_rad), # y math.cos(horizontal_angle_rad), # z ]) camera_direction = camera_distance * camera_direction / np.linalg.norm( camera_direction) yaw, pitch = compute_yaw_and_pitch(camera_direction) query_viewpoints = xp.array( ( camera_direction[0], camera_direction[1], camera_direction[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch), ), dtype=np.float32, ) query_viewpoints = xp.broadcast_to(query_viewpoints, (1, ) + query_viewpoints.shape) return query_viewpoints # added/modified def compute_horizontal_rotation_at_frame(t): '''This rotates the scene horizontally.''' horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4 vertical_angle_rad = 0 return horizontal_angle_rad, vertical_angle_rad def get_mse_image(ground_truth, predicted): '''Calculates MSE between ground truth and predicted observation, and returns an image.''' assert ground_truth.shape == predicted.shape mse_image = np.square(ground_truth - predicted) * 0.5 mse_image = np.concatenate(mse_image).astype(np.float32) mse_image = np.reshape(mse_image, (3, 64, 64)) return mse_image.transpose(1, 2, 0) def render(representation, camera_distance, start_t, end_t, gt_images, gt_viewpoints, animation_frame_array, rotate_camera=True): gt_images = np.squeeze(gt_images) gt_viewpoints = cp.reshape(cp.asarray(gt_viewpoints), (15, 1, 7)) idx = cp.argsort(cp.squeeze(gt_viewpoints)[:, 0]) gt_images = [ i for i, v in sorted(zip(gt_images, idx), key=operator.itemgetter(1)) ] gt_viewpoints = [ i for i, v in sorted(zip(gt_viewpoints, idx), key=operator.itemgetter(1)) ] count = 0 '''shows variance and mean images of 100 samples from the Gaussian.''' for t in range(start_t, end_t): artist_array = [ axis_observations.imshow(make_uint8(axis_observations_image), interpolation="none", animated=True) ] horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame( t) if rotate_camera == False: horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame( 0) query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad, camera_distance) # shape 100x1x3x64x64, when Model is from model_testing.py generated_images = model.generate_image(query_viewpoints, representation, 100) # generate predicted from ground truth viewpoints predicted_images = model.generate_image(gt_viewpoints[count], representation, 1) # predicted_images = model.generate_image(query_viewpoints, representation,1) predicted_images = np.squeeze(predicted_images) image_mse = get_mse_image(gt_images[count], predicted_images) # when sampling with 100 cpu_generated_images = chainer.backends.cuda.to_cpu( generated_images) generated_images = np.squeeze(cpu_generated_images) # # cpu calculation # cpu_image_mean = np.mean(cpu_generated_images,axis=0) # cpu_image_std = np.std(cpu_generated_images,axis=0) # cpu_image_var = np.var(cpu_generated_images,axis=0) # image_mean = np.squeeze(chainer.backends.cuda.to_gpu(cpu_image_mean)) # image_std = chainer.backends.cuda.to_gpu(cpu_image_std) # image_var = np.squeeze(chainer.backends.cuda.to_gpu(cpu_image_var)) image_mean = cp.mean(cp.squeeze(generated_images), axis=0) image_var = cp.var(cp.squeeze(generated_images), axis=0) # convert to black and white. # grayscale r, g, b = image_var gray_image_var = 0.2989 * r + 0.5870 * g + 0.1140 * b # thresholding Otsu's method thresh = threshold_otsu(gray_image_var) var_binary = gray_image_var > thresh sample_image = np.squeeze(generated_images[0]) if count == 14: count = 0 elif (t - fps) % 10 == 0: count += 1 print("computed an image. Count =", count) artist_array.append( axis_generation_variance.imshow(var_binary, cmap=plt.cm.gray, interpolation="none", animated=True)) artist_array.append( axis_generation_mean.imshow(make_uint8(image_mean), interpolation="none", animated=True)) artist_array.append( axis_generation_sample.imshow(make_uint8(sample_image), interpolation="none", animated=True)) artist_array.append( axis_generation_mse.imshow(make_uint8(image_mse), cmap='gray', interpolation="none", animated=True)) animation_frame_array.append(artist_array) #============================================================================== # Visualization #============================================================================== plt.style.use("dark_background") fig = plt.figure(figsize=(6, 7)) plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95) # fig.suptitle("GQN") axis_observations = fig.add_subplot(3, 1, 1) axis_observations.axis("off") axis_observations.set_title("observations") axis_generation_mse = fig.add_subplot(3, 2, 3) axis_generation_mse.axis("off") axis_generation_mse.set_title("MSE") axis_generation_variance = fig.add_subplot(3, 2, 4) axis_generation_variance.axis("off") axis_generation_variance.set_title("Variance") axis_generation_mean = fig.add_subplot(3, 2, 5) axis_generation_mean.axis("off") axis_generation_mean.set_title("Mean") axis_generation_sample = fig.add_subplot(3, 2, 6) axis_generation_sample.axis("off") axis_generation_sample.set_title("Normal Rendering") #============================================================================== # Generating animation #============================================================================== file_number = 1 random.seed(0) np.random.seed(0) with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: animation_frame_array = [] # shape: (batch, views, height, width, channels) images, viewpoints = subset[data_indices] # images, viewpoints, original images = subset[data_indices] camera_distance = np.mean( np.linalg.norm(viewpoints[:, :, :3], axis=2)) # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) # (batch, views, height, width, channels) -> (batch, views, channels, height, width) # original_images = original_images.transpose((0, 1, 4, 2, 3)).astype(np.float32) # original_images = preprocess_images(original_images) batch_index = 0 total_views = images.shape[1] random_observation_view_indices = list(range(total_views)) random.shuffle(random_observation_view_indices) random_viewed_observation_indices = random_observation_view_indices[: total_observations_per_scene] #------------------------------------------------------------------------------ # Ground Truth #------------------------------------------------------------------------------ gt_images = images gt_viewpoints = viewpoints # gt_images = original_images #------------------------------------------------------------------------------ # Observations #------------------------------------------------------------------------------ observed_images = images[batch_index, random_viewed_observation_indices] observed_viewpoints = viewpoints[ batch_index, random_viewed_observation_indices] observed_images = to_device(observed_images) observed_viewpoints = to_device(observed_viewpoints) #------------------------------------------------------------------------------ # Generate images with a single observation #------------------------------------------------------------------------------ # Scene encoder representation = model.compute_observation_representation( observed_images[None, :1], observed_viewpoints[None, :1]) # Update figure observation_index = random_viewed_observation_indices[0] observed_image = images[batch_index, observation_index] axis_observations_image = fill_observations_axis( [observed_image]) # Neural rendering render(representation, camera_distance, fps, fps * 6, gt_images, gt_viewpoints, animation_frame_array) #------------------------------------------------------------------------------ # Add observations #------------------------------------------------------------------------------ for n in range(1, total_observations_per_scene): observation_indices = random_viewed_observation_indices[: n + 1] axis_observations_image = fill_observations_axis( images[batch_index, observation_indices]) # Scene encoder representation = model.compute_observation_representation( observed_images[None, :n + 1], observed_viewpoints[None, :n + 1]) # Neural rendering render(representation, camera_distance, 0, fps // 2, gt_images, gt_viewpoints, animation_frame_array, rotate_camera=False) #------------------------------------------------------------------------------ # Generate images with all observations #------------------------------------------------------------------------------ # Scene encoder representation = model.compute_observation_representation( observed_images[None, :total_observations_per_scene + 1], observed_viewpoints[None, :total_observations_per_scene + 1]) # Neural rendering render(representation, camera_distance, 0, fps * 6, gt_images, gt_viewpoints, animation_frame_array) #------------------------------------------------------------------------------ # Write to file #------------------------------------------------------------------------------ anim = animation.ArtistAnimation(fig, animation_frame_array, interval=1 / fps, blit=True, repeat_delay=0) # anim.save( # "{}/shepard_metzler_observations_{}.gif".format( # args.figure_directory, file_number), # writer="imagemagick", # fps=fps) anim.save("{}/shepard_metzler_observations_{}.mp4".format( args.figure_directory, file_number), writer="ffmpeg", fps=fps) print("video saved") file_number += 1
def main(): try: os.mkdir(args.figure_directory) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters(snapshot_directory=args.snapshot_path) model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() fig = plt.figure(figsize=(12, 16)) axis_observation_1 = plt.subplot2grid((4, 3), (0, 0)) axis_observation_2 = plt.subplot2grid((4, 3), (0, 1)) axis_observation_3 = plt.subplot2grid((4, 3), (0, 2)) axis_predictions = plt.subplot2grid((4, 3), (1, 0), rowspan=3, colspan=3) axis_observation_1.axis("off") axis_observation_2.axis("off") axis_observation_3.axis("off") axis_predictions.set_xticks([], []) axis_predictions.set_yticks([], []) axis_observation_1.set_title("Observation 1", fontsize=22) axis_observation_2.set_title("Observation 2", fontsize=22) axis_observation_3.set_title("Observation 3", fontsize=22) axis_predictions.set_title("Neural Rendering", fontsize=22) axis_predictions.set_xlabel("Yaw", fontsize=22) axis_predictions.set_ylabel("Pitch", fontsize=22) num_views_per_scene = 3 num_yaw_pitch_steps = 10 image_width, image_height = hyperparams.image_size image_shape = (3, ) + hyperparams.image_size prediction_images = make_uint8( np.full((num_yaw_pitch_steps * image_width, num_yaw_pitch_steps * image_height, 3), 0)) file_number = 1 with chainer.no_backprop_mode(): for subset in dataset: iterator = gqn.data.Iterator(subset, batch_size=1) for data_indices in iterator: # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32) images = preprocess_images(images) batch_index = 0 total_views = images.shape[1] observation_view_indices = list(range(total_views)) random.shuffle(observation_view_indices) observation_view_indices = observation_view_indices[: num_views_per_scene] observed_image_array = images[:, observation_view_indices] representation = model.compute_observation_representation( observed_image_array, viewpoints[:, observation_view_indices]) axis_observation_1.imshow( make_uint8(observed_image_array[batch_index, 0])) axis_observation_2.imshow( make_uint8(observed_image_array[batch_index, 1])) axis_observation_3.imshow( make_uint8(observed_image_array[batch_index, 2])) x_angle_rad = math.pi / 2 for pitch_loop in range(num_yaw_pitch_steps): y_angle_rad = math.pi for yaw_loop in range(num_yaw_pitch_steps): eye_norm = 3 eye_y = eye_norm * math.sin(x_angle_rad) radius = math.cos(x_angle_rad) eye = (radius * math.sin(y_angle_rad), eye_y, radius * math.cos(y_angle_rad)) center = (0, 0, 0) yaw = gqn.math.yaw(eye, center) pitch = gqn.math.pitch(eye, center) query_viewpoints = xp.array( (eye[0], eye[1], eye[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch)), dtype=np.float32) query_viewpoints = xp.broadcast_to( query_viewpoints, (1, ) + query_viewpoints.shape) generated_images = model.generate_image( query_viewpoints, representation)[0] yi_start = pitch_loop * image_height yi_end = (pitch_loop + 1) * image_height xi_start = yaw_loop * image_width xi_end = (yaw_loop + 1) * image_width prediction_images[yi_start:yi_end, xi_start:xi_end] = make_uint8( generated_images) y_angle_rad -= 2 * math.pi / num_yaw_pitch_steps x_angle_rad -= math.pi / num_yaw_pitch_steps axis_predictions.imshow(prediction_images) plt.savefig("{}/shepard_matzler_predictions_{}.png".format( args.figure_directory, file_number)) file_number += 1
def main(): try: os.mkdir(args.figure_directory) except: pass #### Model #### xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp hyperparams = HyperParameters(snapshot_directory=args.snapshot_path) model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() print(hyperparams) #### Renderer #### # Set GPU device rtx.set_device(args.gpu_device) # Initialize colors color_array = [] for n in range(args.num_colors): hue = n / (args.num_colors - 1) saturation = 0.9 lightness = 1 red, green, blue = colorsys.hsv_to_rgb(hue, saturation, lightness) color_array.append((red, green, blue, 1)) screen_width = args.image_size screen_height = args.image_size # Setting up a raytracer rt_args = rtx.RayTracingArguments() rt_args.num_rays_per_pixel = 2048 rt_args.max_bounce = 4 rt_args.supersampling_enabled = False cuda_args = rtx.CUDAKernelLaunchArguments() cuda_args.num_threads = 64 cuda_args.num_rays_per_thread = 32 renderer = rtx.Renderer() render_buffer = np.zeros((screen_height, screen_width, 3), dtype=np.float32) camera = rtx.OrthographicCamera() #### Figure #### plt.style.use("dark_background") fig = plt.figure(figsize=(8, 4)) fig.suptitle("GQN") axis_observation = fig.add_subplot(1, 2, 1) axis_observation.axis("off") axis_observation.set_title("Observation") axis_generation = fig.add_subplot(1, 2, 2) axis_generation.axis("off") axis_generation.set_title("Generation") for scene_index in range(1, 100): scene = build_scene(color_array) eye_scale = 3 total_frames_per_rotation = 48 artist_frame_array = [] observation_viewpoint_angle_rad = 0 for k in range(5): eye = tuple(p * eye_scale for p in [ math.cos(observation_viewpoint_angle_rad), math.sin(observation_viewpoint_angle_rad), 0 ]) center = (0, 0, 0) camera.look_at(eye, center, up=(0, 1, 0)) renderer.render(scene, camera, rt_args, cuda_args, render_buffer) # Convert to sRGB frame = np.power(np.clip(render_buffer, 0, 1), 1.0 / 2.2) frame = np.uint8(frame * 255) frame = cv2.bilateralFilter(frame, 3, 25, 25) observation_viewpoint_angle_rad += math.pi / 20 yaw = gqn.math.yaw(eye, center) pitch = gqn.math.pitch(eye, center) ovserved_viewpoint = np.array( eye + (math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch)), dtype=np.float32) ovserved_viewpoint = ovserved_viewpoint[None, None, ...] observed_image = frame.astype(np.float32) observed_image = preprocess_images(observed_image, add_noise=False) observed_image = observed_image[None, None, ...] observed_image = observed_image.transpose((0, 1, 4, 2, 3)) if using_gpu: ovserved_viewpoint = to_gpu(ovserved_viewpoint) observed_image = to_gpu(observed_image) representation = model.compute_observation_representation( observed_image, ovserved_viewpoint) query_viewpoint_angle_rad = 0 for t in range(total_frames_per_rotation): artist_array = [] query_viewpoint = rotate_query_viewpoint( query_viewpoint_angle_rad, 1, xp) # query_viewpoint = rotate_query_viewpoint(math.pi / 6, 1, xp) generated_image = model.generate_image(query_viewpoint, representation) generated_image = make_uint8(generated_image[0]) artist_array.append( axis_observation.imshow(frame, interpolation="none", animated=True)) artist_array.append( axis_generation.imshow(generated_image, animated=True)) query_viewpoint_angle_rad += 2 * math.pi / total_frames_per_rotation artist_frame_array.append(artist_array) anim = animation.ArtistAnimation(fig, artist_frame_array, interval=1 / 24, blit=True, repeat_delay=0) anim.save("{}/shepard_matzler_uncertainty_{}.mp4".format( args.figure_directory, scene_index), writer="ffmpeg", fps=12)