def process_single_frame(prev_outputs, inputs): """Process a single frame of the video.""" cur_image, input_reward, action = inputs time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs generated_items = [prev_image] groundtruth_items = [cur_image] done_warm_start = tf.greater(time_step, context_frames - 1) input_image, = self.get_scheduled_sample_inputs( done_warm_start, groundtruth_items, generated_items, ss_func) # Prediction pred_image, lstm_states = self.construct_predictive_tower( input_image, None, action, lstm_states, latent) if self.hparams.reward_prediction: reward_input_image = pred_image if self.hparams.reward_prediction_stop_gradient: reward_input_image = tf.stop_gradient(reward_input_image) with tf.control_dependencies([time_step]): frame_buf = [reward_input_image] + frame_buf[:-1] pred_reward = self.reward_prediction(frame_buf, None, action, latent) pred_reward = common_video.decode_to_shape( pred_reward, common_layers.shape_list(input_reward), "reward_dec") else: pred_reward = prev_reward time_step += 1 outputs = (time_step, pred_image, pred_reward, frame_buf, lstm_states) return outputs
def reward_prediction(self, input_image, input_reward, action, latent): """Builds a reward prediction network.""" del action del latent conv_size = self.tinyify([32, 32, 16, 4]) with tf.variable_scope("reward_pred", reuse=tf.AUTO_REUSE): x = input_image x = tfcl.batch_norm(x, updates_collections=None, is_training=self.is_training, scope="reward_bn0") x = tfl.conv2d(x, conv_size[1], [3, 3], strides=(2, 2), padding="SAME", activation=tf.nn.relu, name="reward_conv1") x = tfcl.batch_norm(x, updates_collections=None, is_training=self.is_training, scope="reward_bn1") x = tfl.conv2d(x, conv_size[2], [3, 3], strides=(2, 2), padding="SAME", activation=tf.nn.relu, name="reward_conv2") x = tfcl.batch_norm(x, updates_collections=None, is_training=self.is_training, scope="reward_bn2") x = tfl.conv2d(x, conv_size[3], [3, 3], strides=(2, 2), padding="SAME", activation=tf.nn.relu, name="reward_conv3") pred_reward = common_video.decode_to_shape( x, input_reward.shape, "reward_dec") return pred_reward
def process_single_frame(prev_outputs, inputs): """Process a single frame of the video.""" cur_image, input_reward, action = inputs time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs # sample from softmax (by argmax). this is noop for non-softmax loss. prev_image = self.get_sampled_frame(prev_image) generated_items = [prev_image] groundtruth_items = [cur_image] done_warm_start = tf.greater(time_step, context_frames - 1) input_image, = self.get_scheduled_sample_inputs( done_warm_start, groundtruth_items, generated_items, ss_func) # Prediction pred_image, lstm_states, _ = self.construct_predictive_tower( input_image, None, action, lstm_states, latent) if self.hparams.reward_prediction: reward_input_image = self.get_sampled_frame(pred_image) if self.hparams.reward_prediction_stop_gradient: reward_input_image = tf.stop_gradient(reward_input_image) with tf.control_dependencies([time_step]): frame_buf = [reward_input_image] + frame_buf[:-1] pred_reward = self.reward_prediction(frame_buf, None, action, latent) pred_reward = common_video.decode_to_shape( pred_reward, common_layers.shape_list(input_reward), "reward_dec") else: pred_reward = prev_reward time_step += 1 outputs = (time_step, pred_image, pred_reward, frame_buf, lstm_states) return outputs
def construct_model(self, images, actions, rewards): images = tf.unstack(images, axis=0) actions = tf.unstack(actions, axis=0) rewards = tf.unstack(rewards, axis=0) batch_size = common_layers.shape_list(images[0])[0] context_frames = self.hparams.video_num_input_frames # Predicted images and rewards. gen_rewards, gen_images, latent_means, latent_stds = [], [], [], [] # LSTM states. lstm_state = [None] * 7 # Create scheduled sampling function ss_func = self.get_scheduled_sample_func(batch_size) pred_image = tf.zeros_like(images[0]) pred_reward = tf.zeros_like(rewards[0]) latent = None for timestep, image, action, reward in zip(range(len(images) - 1), images[:-1], actions[:-1], rewards[:-1]): # Scheduled Sampling done_warm_start = timestep > context_frames - 1 groundtruth_items = [image, reward] generated_items = [pred_image, pred_reward] input_image, input_reward = self.get_scheduled_sample_inputs( done_warm_start, groundtruth_items, generated_items, ss_func) # Latent # TODO(mbz): should we use input_image iunstead of image? latent_images = tf.stack([image, images[timestep + 1]], axis=0) latent_mean, latent_std = self.construct_latent_tower( latent_images, time_axis=0) latent = common_video.get_gaussian_tensor(latent_mean, latent_std) latent_means.append(latent_mean) latent_stds.append(latent_std) # Prediction pred_image, lstm_state = self.construct_predictive_tower( input_image, input_reward, action, lstm_state, latent) if self.hparams.reward_prediction: pred_reward = self.reward_prediction(pred_image, input_reward, action, latent) pred_reward = common_video.decode_to_shape( pred_reward, common_layers.shape_list(input_reward), "reward_dec") else: pred_reward = input_reward gen_images.append(pred_image) gen_rewards.append(pred_reward) gen_images = tf.stack(gen_images, axis=0) gen_rewards = tf.stack(gen_rewards, axis=0) return gen_images, gen_rewards, latent_means, latent_stds
def construct_model(self, images, actions, rewards): images = tf.unstack(images, axis=0) actions = tf.unstack(actions, axis=0) rewards = tf.unstack(rewards, axis=0) batch_size = common_layers.shape_list(images[0])[0] context_frames = self.hparams.video_num_input_frames # Predicted images and rewards. gen_rewards, gen_images, latent_means, latent_stds = [], [], [], [] # LSTM states. lstm_state = [None] * 7 # Create scheduled sampling function ss_func = self.get_scheduled_sample_func(batch_size) pred_image = tf.zeros_like(images[0]) pred_reward = tf.zeros_like(rewards[0]) latent = None for timestep, image, action, reward in zip( range(len(images)-1), images[:-1], actions[:-1], rewards[:-1]): # Scheduled Sampling done_warm_start = timestep > context_frames - 1 groundtruth_items = [image, reward] generated_items = [pred_image, pred_reward] input_image, input_reward = self.get_scheduled_sample_inputs( done_warm_start, groundtruth_items, generated_items, ss_func) # Latent # TODO(mbz): should we use input_image iunstead of image? latent_images = tf.stack([image, images[timestep+1]], axis=0) latent_mean, latent_std = self.construct_latent_tower( latent_images, time_axis=0) latent = common_video.get_gaussian_tensor(latent_mean, latent_std) latent_means.append(latent_mean) latent_stds.append(latent_std) # Prediction pred_image, lstm_state, _ = self.construct_predictive_tower( input_image, input_reward, action, lstm_state, latent) if self.hparams.reward_prediction: pred_reward = self.reward_prediction( pred_image, input_reward, action, latent) pred_reward = common_video.decode_to_shape( pred_reward, common_layers.shape_list(input_reward), "reward_dec") else: pred_reward = input_reward gen_images.append(pred_image) gen_rewards.append(pred_reward) gen_images = tf.stack(gen_images, axis=0) gen_rewards = tf.stack(gen_rewards, axis=0) return gen_images, gen_rewards, latent_means, latent_stds