Пример #1
0
    def _build_net_critic(self, net_name):
        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
        input_tfs = [norm_s_tf]
        if (self.has_goal()):
            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
            input_tfs += [norm_g_tf]

        #####################[
        if Settings.mode() == Mode.CWT_CNN_v1 or Settings.mode(
        ) == Mode.CWT_CNN_v2:
            cnn_network = NetBuilder.build_net(
                MyCNN.NAME, self.wt_tf, self.my_memory_buffer.channel_count)
            input_tfs += [cnn_network]
        #####################]

        h = NetBuilder.build_net(net_name, input_tfs)
        norm_val_tf = tf.layers.dense(
            inputs=h,
            units=1,
            activation=None,
            kernel_initializer=TFUtil.xavier_initializer)

        norm_val_tf = tf.reshape(norm_val_tf, [-1])
        val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
        return val_tf
Пример #2
0
    def _build_net_actor(self, net_name, init_output_scale):
        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
        input_tfs = [norm_s_tf]
        if (self.has_goal()):
            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
            input_tfs += [norm_g_tf]

        #####################[
        if Settings.mode() == Mode.CWT_CNN_v1 or Settings.mode(
        ) == Mode.CWT_CNN_v2:
            cnn_network = NetBuilder.build_net(
                MyCNN.NAME, self.wt_tf, self.my_memory_buffer.channel_count)
            input_tfs += [cnn_network]
        #####################]

        h = NetBuilder.build_net(net_name, input_tfs)
        norm_a_tf = tf.layers.dense(
            inputs=h,
            units=self.get_action_size(),
            activation=None,
            kernel_initializer=tf.random_uniform_initializer(
                minval=-init_output_scale, maxval=init_output_scale))

        a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
        return a_tf
Пример #3
0
    def _build_net_critic(self, net_name, input_tfs, reuse=False):
        out_size = 1

        with tf.variable_scope('critic', reuse=reuse):
            h = NetBuilder.build_net(net_name, input_tfs, reuse)
            val_tf = tf.layers.dense(inputs=h, units=out_size, activation=None,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    reuse=reuse)
            val_tf = tf.squeeze(val_tf, axis=-1)

        return val_tf
Пример #4
0
 def _build_net_actor(self, net_name, init_output_scale):
     norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
     input_tfs = [norm_s_tf]
     if (self.has_goal()):
         norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
         input_tfs += [norm_g_tf]
     
     h = NetBuilder.build_net(net_name, input_tfs)
     norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
                             kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
     
     a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
     return a_tf
Пример #5
0
 def _build_net_actor(self, net_name, init_output_scale):
     norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
     input_tfs = [norm_s_tf]
     if (self.has_goal()):
         norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
         input_tfs += [norm_g_tf]
     
     h = NetBuilder.build_net(net_name, input_tfs)
     norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
                             kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
     
     a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
     return a_tf
Пример #6
0
    def _build_net_critic(self, net_name):
        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
        input_tfs = [norm_s_tf]
        if (self.has_goal()):
            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
            input_tfs += [norm_g_tf]
        
        h = NetBuilder.build_net(net_name, input_tfs)
        norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
                                kernel_initializer=TFUtil.xavier_initializer);

        norm_val_tf = tf.reshape(norm_val_tf, [-1])
        val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
        return val_tf
Пример #7
0
    def _build_net_critic(self, net_name):
        norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
        input_tfs = [norm_s_tf]
        if (self.has_goal()):
            norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
            input_tfs += [norm_g_tf]
        
        h = NetBuilder.build_net(net_name, input_tfs)
        norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
                                kernel_initializer=TFUtil.xavier_initializer);

        norm_val_tf = tf.reshape(norm_val_tf, [-1])
        val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
        return val_tf
Пример #8
0
 def _build_disc_net(self,
                     net_name,
                     input_tfs,
                     init_output_scale,
                     reuse=False):
     out_size = 1
     h = net_builder.build_net(net_name, input_tfs, reuse)
     logits_tf = tf.layers.dense(
         inputs=h,
         units=out_size,
         activation=None,
         reuse=reuse,
         kernel_initializer=tf.random_uniform_initializer(
             minval=-init_output_scale, maxval=init_output_scale),
         name=self.DISC_LOGIT_NAME)
     return logits_tf
Пример #9
0
    def _build_net_critic(self, net_name):
        with tf.device('cpu:0'):
            norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
            input_tfs = [norm_s_tf]
            if (self.has_goal()):
                norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
                input_tfs += [norm_g_tf]

            h = NetBuilder.build_net(net_name, input_tfs)
            norm_val_tf = tf.layers.dense(inputs=h,
                                          units=1,
                                          activation=None,
                                          kernel_initializer='glorot_uniform')

            norm_val_tf = tf.reshape(norm_val_tf, [-1])
            val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
        return val_tf
Пример #10
0
    def _build_net_actor(self, net_name, input_tfs, init_output_scale, reuse=False):
        with tf.variable_scope('actor', reuse=reuse):
            h = NetBuilder.build_net(net_name, input_tfs, reuse)
            
            std_type = TFDistributionGaussianDiag.StdType.Default
            a_size = self.get_action_size()

            mean_kernel_init = tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale)
            mean_bias_init = tf.zeros_initializer()
            logstd_kernel_init = tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale)
            logstd_bias_init = np.log(self.exp_params_curr.noise) * np.ones(a_size)
            logstd_bias_init = logstd_bias_init.astype(np.float32)
            
            norm_a_pd_tf = TFDistributionGaussianDiag(input=h, dim=a_size, std_type=std_type,
                                 mean_kernel_init=mean_kernel_init, mean_bias_init=mean_bias_init, 
                                 logstd_kernel_init=logstd_kernel_init, logstd_bias_init=logstd_bias_init,
                                 reuse=reuse)

        return norm_a_pd_tf