def _encode_state(self, state, reuse):
        with tf.variable_scope('state_encoder', reuse=reuse):
            x = self._dense_norm_activation(
                state,
                512,
                kernel_initializer=tf_utils.kaiming_initializer(),
                activation=tf.nn.relu)

        return x
    def _critic(self, state, action, reuse):
        x = self._encode_state(state, True)
        with tf.variable_scope('critic', reuse=reuse):
            x = tf.concat([x, action], 1)
            x = self._dense_norm_activation(
                x,
                256,
                kernel_initializer=tf_utils.kaiming_initializer(),
                activation=tf.nn.relu)
            x = self._dense(x, 1)

        return x
    def _actor(self, state):
        x = self._encode_state(state, self.reuse)
        with tf.variable_scope('actor', reuse=self.reuse):
            x = self._dense_norm_activation(
                x,
                256,
                kernel_initializer=tf_utils.kaiming_initializer(),
                activation=tf.nn.relu)
            x = self._dense_norm_activation(x, self.action_size)
            x = tf.clip_by_value(x, -1, 1, name='action')

        return x
예제 #4
0
    def _dense_ln_relu(self,
                       x,
                       units,
                       kernel_initializer=tf_utils.kaiming_initializer(),
                       trainable=True,
                       name=None,
                       reuse=None):
        x = self._dense(x,
                        units,
                        kernel_initializer=kernel_initializer,
                        trainable=trainable,
                        name=name,
                        reuse=reuse)
        x = tf_utils.ln_relu(x)

        return x
예제 #5
0
    def _convtrans_bn_relu(self,
                           x,
                           filters,
                           kernel_size,
                           strides=1,
                           padding='same',
                           kernel_initializer=tf_utils.kaiming_initializer(),
                           trainable=True,
                           name=None,
                           reuse=None):
        x = self._convtrans(x,
                            filters,
                            kernel_size,
                            strides=strides,
                            padding=padding,
                            kernel_initializer=kernel_initializer,
                            trainable=trainable,
                            name=name,
                            reuse=reuse)
        x = tf_utils.bn_relu(x, self._training)

        return x