def rollout_states(state, config): hidden_states = [] states = [state] with tf.variable_scope('states'): for i in range(1, config.predictron_depth + 1): if config.shared_core: scope = 'shared-core' reuse = i > 1 else: scope = 'core-%d' % i reuse = False with tf.variable_scope(scope, reuse=reuse): hidden_state, state = model_network(state, config, reuse) states.append(state) hidden_states.append(hidden_state) states = tf.reshape( tf.stack(states[:-1], 1), [config.batch_size, config.predictron_depth, state_size(config)]) hidden_states = tf.reshape( tf.stack(hidden_states, 1), [config.batch_size, config.predictron_depth, state_size(config)]) util.activation_summary(states) return states, hidden_states
def state_representation(inputs, config): with tf.variable_scope('state_representation') as scope: with tf.variable_scope('layer-1') as scope: kernel_1 = util.variable_with_weight_decay( 'weights', [3, 3, config.input_channels, config.state_kernels]) biases_1 = util.variable_on_cpu('biases', [config.state_kernels], tf.constant_initializer(0.1)) conv_1 = tf.nn.conv2d(inputs, kernel_1, [1, 1, 1, 1], padding='SAME') bias_1 = tf.nn.bias_add(conv_1, biases_1) hidden_1 = tf.nn.relu(bias_1, name=scope.name) util.activation_summary(hidden_1) with tf.variable_scope('layer-2') as scope: kernel_2 = util.variable_with_weight_decay( 'weights', [3, 3, config.state_kernels, config.state_kernels]) biases_2 = util.variable_on_cpu('biases', [config.state_kernels], tf.constant_initializer(0.1)) conv_2 = tf.nn.conv2d(hidden_1, kernel_2, [1, 1, 1, 1], padding='SAME') bias_2 = tf.nn.bias_add(conv_2, biases_2) state_representation = tf.nn.relu(bias_2, name=scope.name) util.activation_summary(state_representation) return state_representation
def sample_graph(self, reuse=False): self.sampled_codes = ops.sample_gaussian(self.prior_codes_mu, self.prior_codes_sigma, self.codes_noise, 'sample_codes', 1.0) self.sampled_recs_mu = self.decode_codes(self.sampled_codes, reuse=reuse) util.activation_summary(self.sampled_codes, 'sampled_img_codes')
def lambda_preturn_network(preturns, lambdas): # Final lamdba must be zero final_lambda = tf.Assert(tf.reduce_all(tf.equal(lambdas[:, -1, :], 0.0)), [lambdas[:, -1, :]]) with tf.control_dependencies([final_lambda]): with tf.variable_scope('lambda_preturn'): accum_lambda = tf.cumprod(lambdas, axis=1, exclusive=True) lambda_bar = ( 1 - lambdas) * accum_lambda # This should always sum to 1 lambda_preturn = tf.reduce_sum(lambda_bar * preturns, reduction_indices=1) util.activation_summary(lambda_preturn) return lambda_preturn
def reward_network(hidden_states, config): with tf.variable_scope('reward') as scope: rewards = output_network(hidden_states, config) # Insert rewards[0] as zero rewards = tf.slice(rewards, begin=[0, 0, 0], size=[ config.batch_size, config.predictron_depth - 1, config.reward_size ]) rewards = tf.concat( axis=1, values=[ tf.zeros([config.batch_size, 1, config.reward_size]), rewards ]) util.activation_summary(rewards) return rewards
def lambda_network(hidden_states, config): with tf.variable_scope('lambda') as scope: logits = output_network(hidden_states, config) lambdas = tf.nn.sigmoid(logits, name=scope.name) # Set lambdas[-1] to zero lambdas = tf.slice(lambdas, begin=[0, 0, 0], size=[ config.batch_size, config.predictron_depth - 1, config.reward_size ]) lambdas = tf.concat(axis=1, values=[ lambdas, tf.zeros( [config.batch_size, 1, config.reward_size]) ]) util.activation_summary(lambdas) return lambdas
def discount_network(hidden_states, config): with tf.variable_scope('discount') as scope: logits = output_network(hidden_states, config) discounts = tf.nn.sigmoid(logits) # Insert discounts[0] as one discounts = tf.slice(discounts, begin=[0, 0, 0], size=[ config.batch_size, config.predictron_depth - 1, config.reward_size ]) discounts = tf.concat( axis=1, values=[ tf.ones([config.batch_size, 1, config.reward_size]), discounts ]) util.activation_summary(discounts) return discounts
def preturn_network(rewards, discounts, values): # First reward must be zero, first discount must be one first_reward = tf.Assert(tf.reduce_all(tf.equal(rewards[:, 0, :], 0.0)), [rewards[:, 0, :]]) first_discount = tf.Assert( tf.reduce_all(tf.equal(discounts[:, 0, :], 1.0)), [discounts[:, 0, :]]) with tf.control_dependencies([first_reward, first_discount]): with tf.variable_scope('preturn'): accum_value_discounts = tf.cumprod(discounts, axis=1, exclusive=False) accum_reward_discounts = tf.cumprod(discounts, axis=1, exclusive=True) discounted_values = values * accum_value_discounts discounted_rewards = rewards * accum_reward_discounts cumulative_rewards = tf.cumsum(discounted_rewards, axis=1) preturns = cumulative_rewards + discounted_values util.activation_summary(preturns) return preturns
def train_graph(self, reuse=False): self.codes_mu, self.codes_sigma = self.encode(self.images, reuse=reuse) self.codes = ops.sample_gaussian(self.codes_mu, self.codes_sigma, self.codes_noise, 'sample_codes', self.stocha[0]) self.recs_mu = self.decode_codes(self.codes, reuse=reuse) util.activation_summary(self.codes_mu, 'img_codes_mu') util.activation_summary(self.codes_sigma, 'img_codes_sigma') util.activation_summary(self.codes, 'img_codes')
def value_network(states, config): with tf.variable_scope('value') as scope: values = output_network(states, config) util.activation_summary(values) return values
def model_network(state, config, reuse): with tf.variable_scope('model', reuse=reuse): with tf.variable_scope('layer-1', reuse=reuse) as scope: kernel_1 = util.variable_with_weight_decay( 'weights', [3, 3, config.state_kernels, config.state_kernels]) biases_1 = util.variable_on_cpu('biases', [config.state_kernels], tf.constant_initializer(0.1)) conv_1 = tf.nn.conv2d(state, kernel_1, [1, 1, 1, 1], padding='SAME') bias_1 = tf.nn.bias_add(conv_1, biases_1) normalized_1 = tf.contrib.layers.batch_norm( bias_1, decay=0.99, center=False, scale=False, is_training=config.is_training, scope=scope, reuse=reuse) hidden_layer_1 = tf.nn.relu(normalized_1, name=scope.name) util.activation_summary(hidden_layer_1) with tf.variable_scope('layer-2', reuse=reuse) as scope: kernel_2 = util.variable_with_weight_decay( 'weights', [3, 3, config.state_kernels, config.state_kernels]) biases_2 = util.variable_on_cpu('biases', [config.state_kernels], tf.constant_initializer(0.1)) conv_2 = tf.nn.conv2d(hidden_layer_1, kernel_2, [1, 1, 1, 1], padding='SAME') bias_2 = tf.nn.bias_add(conv_2, biases_2) normalized_2 = tf.contrib.layers.batch_norm( bias_2, decay=0.99, center=False, scale=False, is_training=config.is_training, scope=scope, reuse=reuse) hidden_layer_2 = tf.nn.relu(normalized_2, name=scope.name) util.activation_summary(hidden_layer_2) with tf.variable_scope('layer-3', reuse=reuse) as scope: kernel_3 = util.variable_with_weight_decay( 'weights', [3, 3, config.state_kernels, config.state_kernels]) biases_3 = util.variable_on_cpu('biases', [config.state_kernels], tf.constant_initializer(0.1)) conv_3 = tf.nn.conv2d(hidden_layer_2, kernel_3, [1, 1, 1, 1], padding='SAME') bias_3 = tf.nn.bias_add(conv_3, biases_3) normalized_3 = tf.contrib.layers.batch_norm( bias_3, decay=0.99, center=False, scale=False, is_training=config.is_training, scope=scope, reuse=reuse) next_state = tf.nn.relu(normalized_3, name=scope.name) return hidden_layer_1, next_state