def _encode_state(self, state, reuse): with tf.variable_scope('state_encoder', reuse=reuse): x = self._dense_norm_activation( state, 512, kernel_initializer=tf_utils.kaiming_initializer(), activation=tf.nn.relu) return x
def _critic(self, state, action, reuse): x = self._encode_state(state, True) with tf.variable_scope('critic', reuse=reuse): x = tf.concat([x, action], 1) x = self._dense_norm_activation( x, 256, kernel_initializer=tf_utils.kaiming_initializer(), activation=tf.nn.relu) x = self._dense(x, 1) return x
def _actor(self, state): x = self._encode_state(state, self.reuse) with tf.variable_scope('actor', reuse=self.reuse): x = self._dense_norm_activation( x, 256, kernel_initializer=tf_utils.kaiming_initializer(), activation=tf.nn.relu) x = self._dense_norm_activation(x, self.action_size) x = tf.clip_by_value(x, -1, 1, name='action') return x
def _dense_ln_relu(self, x, units, kernel_initializer=tf_utils.kaiming_initializer(), trainable=True, name=None, reuse=None): x = self._dense(x, units, kernel_initializer=kernel_initializer, trainable=trainable, name=name, reuse=reuse) x = tf_utils.ln_relu(x) return x
def _convtrans_bn_relu(self, x, filters, kernel_size, strides=1, padding='same', kernel_initializer=tf_utils.kaiming_initializer(), trainable=True, name=None, reuse=None): x = self._convtrans(x, filters, kernel_size, strides=strides, padding=padding, kernel_initializer=kernel_initializer, trainable=trainable, name=name, reuse=reuse) x = tf_utils.bn_relu(x, self._training) return x