예제 #1
0
def rollout_states(state, config):
    hidden_states = []
    states = [state]

    with tf.variable_scope('states'):
        for i in range(1, config.predictron_depth + 1):
            if config.shared_core:
                scope = 'shared-core'
                reuse = i > 1
            else:
                scope = 'core-%d' % i
                reuse = False

            with tf.variable_scope(scope, reuse=reuse):
                hidden_state, state = model_network(state, config, reuse)
                states.append(state)
                hidden_states.append(hidden_state)

        states = tf.reshape(
            tf.stack(states[:-1], 1),
            [config.batch_size, config.predictron_depth,
             state_size(config)])
        hidden_states = tf.reshape(
            tf.stack(hidden_states, 1),
            [config.batch_size, config.predictron_depth,
             state_size(config)])

        util.activation_summary(states)
        return states, hidden_states
예제 #2
0
def state_representation(inputs, config):
    with tf.variable_scope('state_representation') as scope:
        with tf.variable_scope('layer-1') as scope:
            kernel_1 = util.variable_with_weight_decay(
                'weights', [3, 3, config.input_channels, config.state_kernels])
            biases_1 = util.variable_on_cpu('biases', [config.state_kernels],
                                            tf.constant_initializer(0.1))
            conv_1 = tf.nn.conv2d(inputs,
                                  kernel_1, [1, 1, 1, 1],
                                  padding='SAME')
            bias_1 = tf.nn.bias_add(conv_1, biases_1)
            hidden_1 = tf.nn.relu(bias_1, name=scope.name)
            util.activation_summary(hidden_1)

        with tf.variable_scope('layer-2') as scope:
            kernel_2 = util.variable_with_weight_decay(
                'weights', [3, 3, config.state_kernels, config.state_kernels])
            biases_2 = util.variable_on_cpu('biases', [config.state_kernels],
                                            tf.constant_initializer(0.1))
            conv_2 = tf.nn.conv2d(hidden_1,
                                  kernel_2, [1, 1, 1, 1],
                                  padding='SAME')
            bias_2 = tf.nn.bias_add(conv_2, biases_2)
            state_representation = tf.nn.relu(bias_2, name=scope.name)
            util.activation_summary(state_representation)

        return state_representation
예제 #3
0
    def sample_graph(self, reuse=False):
        self.sampled_codes = ops.sample_gaussian(self.prior_codes_mu,
                                                 self.prior_codes_sigma,
                                                 self.codes_noise,
                                                 'sample_codes', 1.0)

        self.sampled_recs_mu = self.decode_codes(self.sampled_codes,
                                                 reuse=reuse)
        util.activation_summary(self.sampled_codes, 'sampled_img_codes')
예제 #4
0
def lambda_preturn_network(preturns, lambdas):
    # Final lamdba must be zero
    final_lambda = tf.Assert(tf.reduce_all(tf.equal(lambdas[:, -1, :], 0.0)),
                             [lambdas[:, -1, :]])

    with tf.control_dependencies([final_lambda]):
        with tf.variable_scope('lambda_preturn'):
            accum_lambda = tf.cumprod(lambdas, axis=1, exclusive=True)
            lambda_bar = (
                1 - lambdas) * accum_lambda  # This should always sum to 1
            lambda_preturn = tf.reduce_sum(lambda_bar * preturns,
                                           reduction_indices=1)

            util.activation_summary(lambda_preturn)
            return lambda_preturn
예제 #5
0
def reward_network(hidden_states, config):
    with tf.variable_scope('reward') as scope:
        rewards = output_network(hidden_states, config)

        # Insert rewards[0] as zero
        rewards = tf.slice(rewards,
                           begin=[0, 0, 0],
                           size=[
                               config.batch_size, config.predictron_depth - 1,
                               config.reward_size
                           ])
        rewards = tf.concat(
            axis=1,
            values=[
                tf.zeros([config.batch_size, 1, config.reward_size]), rewards
            ])

        util.activation_summary(rewards)
        return rewards
예제 #6
0
def lambda_network(hidden_states, config):
    with tf.variable_scope('lambda') as scope:
        logits = output_network(hidden_states, config)
        lambdas = tf.nn.sigmoid(logits, name=scope.name)

        # Set lambdas[-1] to zero
        lambdas = tf.slice(lambdas,
                           begin=[0, 0, 0],
                           size=[
                               config.batch_size, config.predictron_depth - 1,
                               config.reward_size
                           ])
        lambdas = tf.concat(axis=1,
                            values=[
                                lambdas,
                                tf.zeros(
                                    [config.batch_size, 1, config.reward_size])
                            ])

        util.activation_summary(lambdas)
        return lambdas
예제 #7
0
def discount_network(hidden_states, config):
    with tf.variable_scope('discount') as scope:
        logits = output_network(hidden_states, config)
        discounts = tf.nn.sigmoid(logits)

        # Insert discounts[0] as one
        discounts = tf.slice(discounts,
                             begin=[0, 0, 0],
                             size=[
                                 config.batch_size,
                                 config.predictron_depth - 1,
                                 config.reward_size
                             ])
        discounts = tf.concat(
            axis=1,
            values=[
                tf.ones([config.batch_size, 1, config.reward_size]), discounts
            ])

        util.activation_summary(discounts)
        return discounts
예제 #8
0
def preturn_network(rewards, discounts, values):
    # First reward must be zero, first discount must be one
    first_reward = tf.Assert(tf.reduce_all(tf.equal(rewards[:, 0, :], 0.0)),
                             [rewards[:, 0, :]])
    first_discount = tf.Assert(
        tf.reduce_all(tf.equal(discounts[:, 0, :], 1.0)), [discounts[:, 0, :]])

    with tf.control_dependencies([first_reward, first_discount]):
        with tf.variable_scope('preturn'):
            accum_value_discounts = tf.cumprod(discounts,
                                               axis=1,
                                               exclusive=False)
            accum_reward_discounts = tf.cumprod(discounts,
                                                axis=1,
                                                exclusive=True)
            discounted_values = values * accum_value_discounts
            discounted_rewards = rewards * accum_reward_discounts
            cumulative_rewards = tf.cumsum(discounted_rewards, axis=1)
            preturns = cumulative_rewards + discounted_values

            util.activation_summary(preturns)
            return preturns
예제 #9
0
    def train_graph(self, reuse=False):
        self.codes_mu, self.codes_sigma = self.encode(self.images, reuse=reuse)
        self.codes = ops.sample_gaussian(self.codes_mu, self.codes_sigma,
                                         self.codes_noise, 'sample_codes',
                                         self.stocha[0])

        self.recs_mu = self.decode_codes(self.codes, reuse=reuse)

        util.activation_summary(self.codes_mu, 'img_codes_mu')
        util.activation_summary(self.codes_sigma, 'img_codes_sigma')
        util.activation_summary(self.codes, 'img_codes')
예제 #10
0
def value_network(states, config):
    with tf.variable_scope('value') as scope:
        values = output_network(states, config)
        util.activation_summary(values)
        return values
예제 #11
0
def model_network(state, config, reuse):
    with tf.variable_scope('model', reuse=reuse):
        with tf.variable_scope('layer-1', reuse=reuse) as scope:
            kernel_1 = util.variable_with_weight_decay(
                'weights', [3, 3, config.state_kernels, config.state_kernels])
            biases_1 = util.variable_on_cpu('biases', [config.state_kernels],
                                            tf.constant_initializer(0.1))
            conv_1 = tf.nn.conv2d(state,
                                  kernel_1, [1, 1, 1, 1],
                                  padding='SAME')
            bias_1 = tf.nn.bias_add(conv_1, biases_1)
            normalized_1 = tf.contrib.layers.batch_norm(
                bias_1,
                decay=0.99,
                center=False,
                scale=False,
                is_training=config.is_training,
                scope=scope,
                reuse=reuse)
            hidden_layer_1 = tf.nn.relu(normalized_1, name=scope.name)
            util.activation_summary(hidden_layer_1)

        with tf.variable_scope('layer-2', reuse=reuse) as scope:
            kernel_2 = util.variable_with_weight_decay(
                'weights', [3, 3, config.state_kernels, config.state_kernels])
            biases_2 = util.variable_on_cpu('biases', [config.state_kernels],
                                            tf.constant_initializer(0.1))
            conv_2 = tf.nn.conv2d(hidden_layer_1,
                                  kernel_2, [1, 1, 1, 1],
                                  padding='SAME')
            bias_2 = tf.nn.bias_add(conv_2, biases_2)
            normalized_2 = tf.contrib.layers.batch_norm(
                bias_2,
                decay=0.99,
                center=False,
                scale=False,
                is_training=config.is_training,
                scope=scope,
                reuse=reuse)
            hidden_layer_2 = tf.nn.relu(normalized_2, name=scope.name)
            util.activation_summary(hidden_layer_2)

        with tf.variable_scope('layer-3', reuse=reuse) as scope:
            kernel_3 = util.variable_with_weight_decay(
                'weights', [3, 3, config.state_kernels, config.state_kernels])
            biases_3 = util.variable_on_cpu('biases', [config.state_kernels],
                                            tf.constant_initializer(0.1))
            conv_3 = tf.nn.conv2d(hidden_layer_2,
                                  kernel_3, [1, 1, 1, 1],
                                  padding='SAME')
            bias_3 = tf.nn.bias_add(conv_3, biases_3)
            normalized_3 = tf.contrib.layers.batch_norm(
                bias_3,
                decay=0.99,
                center=False,
                scale=False,
                is_training=config.is_training,
                scope=scope,
                reuse=reuse)
            next_state = tf.nn.relu(normalized_3, name=scope.name)

        return hidden_layer_1, next_state