Exemplo n.º 1
0
    def _log_loss(self):
        if self.log_tensorboard:
            with tf.name_scope('loss'):
                tf.compat.v1.summary.scalar('loss_', self.loss)

            with tf.name_scope('networks'):
                stats_summary('Q', self.Qnets.Q)
Exemplo n.º 2
0
 def attention(self, q, k, v, mask=None):
     # softmax(QK^T/)V
     dot_product = tf.matmul(q, k, transpose_b=True) # [B, H, N, N]
     if mask:
         dot_product *= mask
     weights = tf.nn.softmax(dot_product)            # [B, H, N, N]
     x = tf.matmul(weights, v)                       # [B, H, N, V]
     # Test code to monitor saturation of softmax
     if hasattr(self, 'log_tensorboard') and self.log_tensorboard:
         with tf.name_scope('attention'):
             tf_utils.stats_summary('softmax', weights, hist=True)
             tf_utils.stats_summary('output', x)
     
     return x
Exemplo n.º 3
0
    def noisy(self, x, units, kernel_initializer=tc.layers.xavier_initializer(), 
               name=None, sigma=.4, factorized=True, return_noise=False):
        """ noisy layer using factorized Gaussian noise """
        name = self.get_name(name, 'noisy')
        
        y = self.dense(x, units, kernel_initializer=kernel_initializer)
            
        with tf.variable_scope(name):
            # params for the noisy layer
            features = x.shape.as_list()[-1]

            if factorized:
                w_in_dim = [features, 1]
                w_out_dim = [1, units]
                w_shape = [features, units]
                b_shape = [units]
                epsilon_w_in = tf.random.truncated_normal(w_in_dim, stddev=sigma)
                epsilon_w_in = tf.math.sign(epsilon_w_in) * tf.math.sqrt(tf.math.abs(epsilon_w_in))
                epsilon_w_out = tf.random.truncated_normal(w_out_dim, stddev=sigma)
                epsilon_w_out = tf.math.sign(epsilon_w_out) * tf.math.sqrt(tf.math.abs(epsilon_w_out))
                epsilon_w = tf.matmul(epsilon_w_in, epsilon_w_out, name='epsilon_w')
                epsilon_b = tf.reshape(epsilon_w_out, b_shape)
            else:
                w_shape = [features, units]
                b_shape = [units]
                epsilon_w = tf.random.truncated_normal(w_shape, stddev=sigma, name='epsilon_w')
                epsilon_b = tf.random.truncated_normal(b_shape, stddev=sigma, name='epsilon_b')

            noisy_w = tf.get_variable('noisy_w', shape=w_shape, 
                                        initializer=kernel_initializer,
                                        regularizer=self.l2_regularizer)
            noisy_b = tf.get_variable('noisy_b', shape=b_shape, 
                                        initializer=tf.constant_initializer(sigma / np.sqrt(units)))
            
            # output of the noisy layer
            o = tf.matmul(x, noisy_w * epsilon_w) + noisy_b * epsilon_b
        if hasattr(self, 'log_tensorboard') and self.log_tensorboard:
            with tf.name_scope(f'{name}_log'):
                tf_utils.stats_summary('x', x, std=True, hist=True)
                tf_utils.stats_summary('w', noisy_w, std=True, hist=True)
                tf_utils.stats_summary('b', noisy_b, std=True, hist=True)
                tf_utils.stats_summary('o', o, std=True, hist=True)
                tf_utils.stats_summary('y', y, std=True, hist=True)

        if return_noise:
            return o, y
        else:
            return o + y
Exemplo n.º 4
0
 def _log_loss(self):
     if self.log_tensorboard:
         with tf.name_scope('info'):
             tf.compat.v1.summary.scalar('actor_loss_', self.actor_loss)
             tf.compat.v1.summary.scalar('critic_loss_', self.critic_loss)
             stats_summary('Q_with_actor',
                           self.critic.Q_with_actor,
                           max=True,
                           hist=True)
             stats_summary('reward',
                           self.data['reward'],
                           min=True,
                           hist=True)
             stats_summary('priority', self.priority, hist=True, max=True)
Exemplo n.º 5
0
 def _log_loss(self):
     if self.log_tensorboard:
         with tf.name_scope('info'):
             stats_summary('reward',
                           self.data['reward'],
                           min=True,
                           max=True,
                           hist=True)
             with tf.name_scope('actor'):
                 stats_summary('orig_action', self.actor.orig_action)
                 stats_summary('entropy',
                               self.actor.action_distribution.entropy())
                 stats_summary('action_std',
                               self.actor.action_distribution.std)
                 stats_summary('orig_logpi', self.actor.orig_logpi)
                 tf.compat.v1.summary.scalar('orig_logpi_0',
                                             self.actor.orig_logpi[0][0])
                 stats_summary('action', self.actor.action)
                 stats_summary('logpi', self.actor.logpi)
                 tf.compat.v1.summary.scalar('actor_loss_', self.actor_loss)
             with tf.name_scope('critic'):
                 stats_summary('Q1_with_actor',
                               self.critic.Q1_with_actor,
                               min=True,
                               max=True)
                 stats_summary('Q2_with_actor',
                               self.critic.Q2_with_actor,
                               min=True,
                               max=True)
                 if self.buffer_type == 'proportional':
                     stats_summary('priority',
                                   self.priority,
                                   std=True,
                                   max=True,
                                   hist=True)
                 tf.compat.v1.summary.scalar('Q1_loss_', self.Q1_loss)
                 tf.compat.v1.summary.scalar('Q2_loss_', self.Q2_loss)
                 tf.compat.v1.summary.scalar('critic_loss_',
                                             self.critic_loss)
             if self.raw_temperature == 'auto':
                 with tf.name_scope('alpha'):
                     stats_summary('alpha', self.alpha, std=True)
                     tf.compat.v1.summary.scalar('alpha_loss',
                                                 self.alpha_loss)
Exemplo n.º 6
0
    def _log_info(self):
        if self.log_tensorboard:
            with tf.name_scope('info'):
                with tf.name_scope('value'):
                    stats_summary('V', self.ac.V)
                    stats_summary('advantage', self.env_phs['advantage'])
                    stats_summary('return', self.env_phs['return'])

                    stats_summary('V_mask',
                                  self.ac.V * self.env_phs['mask'],
                                  hist=True)
                    stats_summary(
                        'advantage_mask',
                        self.env_phs['advantage'] * self.env_phs['mask'])
                    stats_summary(
                        'return_mask',
                        self.env_phs['return'] * self.env_phs['mask'])

                    stats_summary('mask', self.env_phs['mask'])

                with tf.name_scope('policy'):
                    stats_summary('mean_', self.ac.action_distribution.mean)
                    stats_summary('std_', self.ac.action_distribution.std)