def __call__(self, obs, action, reuse=False):

        with tf.variable_scope(self.name) as scope:
            if reuse:
                scope.reuse_variables()

            x = obs
            y = action
            count = 0
            for config in self.net_config:
                if count == 0:
                    x = tf.layers.dense(
                        x,
                        config['N_UNITS'],
                        kernel_initializer=contrib_W_init(),
                        bias_initializer=tf.constant_initializer(
                            value=config['B_INIT_VALUE']))
                    y = tf.layers.dense(
                        y,
                        config['N_UNITS'],
                        kernel_initializer=contrib_W_init(),
                        bias_initializer=tf.constant_initializer(
                            value=config['B_INIT_VALUE']))
                    x = tf.concat([x, y], axis=1)
                    count += 1
                else:
                    if config['TYPE'] == 'DENSE':
                        x = tf.layers.dense(
                            x,
                            config['N_UNITS'],
                            kernel_initializer=contrib_W_init(),
                            bias_initializer=tf.constant_initializer(
                                value=config['B_INIT_VALUE']))
                    else:
                        raise NotImplementedError(
                            "Not support this type layer: %s" % config['TYPE'])
                    if self.layer_norm == 1 and config['NAME'] != 'OUTPUT':
                        x = tc.layers.layer_norm(x, center=True, scale=True)
                    if act_dict[config['ACT']] is None:
                        raise NotImplementedError(
                            "Not support this type activation function: %s" %
                            config['ACT'])
                    else:
                        x = act_dict[config['ACT']](x)
                    count += 1
        return x
    def create_network(input,
                       network_config,
                       net_name=None,
                       input_norm=None,
                       output_norm=None,
                       output_low=None,
                       output_high=None,
                       reuse=False):
        # network_config should be a list consist of dict
        # input_norm, output_norm is a list consist of two tensorflow placeholder

        net = tl.layers.InputLayer(inputs=input, name=net_name + '_INPUT')

        if input_norm:
            net = tl.layers.LambdaLayer(prev_layer=net,
                                        fn=lambda x:
                                        (x - input_norm[0]) / input_norm[1])
        last_layer_act = None
        for layer_config in network_config:
            if layer_config['TYPE'] == 'DENSE':
                if layer_config['B_INIT_VALUE'] == 'None':
                    b_init = None
                else:
                    b_init = tf.constant_initializer(
                        value=layer_config['B_INIT_VALUE'])

                net = tl.layers.DenseLayer(
                    prev_layer=net,
                    n_units=layer_config['N_UNITS'],
                    act=NetworkCreator.act_dict[layer_config['ACT']],
                    name=net_name + '_' + layer_config['NAME'],
                    W_init=contrib_W_init(),
                    b_init=b_init)
                last_layer_act = layer_config['ACT']
        if output_norm:
            net = tl.layers.LambdaLayer(prev_layer=net,
                                        fn=lambda x:
                                        (x * output_norm[0]) + output_norm[1],
                                        name=net_name + '_NORM')
        if output_high is not None and output_low is not None:
            if last_layer_act != "IDENTITY":
                raise ValueError(
                    'Please set the last layer activation as identity to use output scale'
                )
            net = tl.layers.LambdaLayer(prev_layer=net,
                                        fn=lambda x: tf.nn.tanh(x),
                                        name=net_name + '_TANH')
            net = tl.layers.LambdaLayer(
                prev_layer=net,
                fn=lambda x: (x + 1.0) / 2.0 *
                (output_high - output_low) + output_low,
                name=net_name + '_NORM_AFTER_TANH')

            # TODO ADD MORE SUPPORT FOR DIFFERENT LAYER
        return net, net.outputs, net.all_params
 def __call__(self, obs, reuse=False):
     with tf.variable_scope(self.name) as scope:
         if reuse:
             scope.reuse_variables()
         x = obs
         last_act = None
         for config in self.net_config:
             use_bias = not (config['B_INIT_VALUE']) == 'None'
             if config['TYPE'] == 'DENSE':
                 x = tf.layers.dense(
                     x,
                     config['N_UNITS'],
                     kernel_initializer=contrib_W_init(),
                     bias_initializer=tf.constant_initializer(
                         value=config['B_INIT_VALUE']),
                     use_bias=use_bias)
             else:
                 raise NotImplementedError(
                     "Not support this type layer: %s" % config['TYPE'])
             if self.layer_norm == 1 and config['NAME'] != 'OUTPUT':
                 x = tc.layers.layer_norm(x, center=True, scale=True)
             if act_dict[config['ACT']] is None:
                 raise NotImplementedError(
                     "Not support this type activation function: %s" %
                     config['ACT'])
             else:
                 x = act_dict[config['ACT']](x)
                 last_act = config['ACT']
         if last_act == "TANH":
             x = (x + 1.0) / 2.0 * (self.action_high -
                                    self.action_low) + self.action_low
         elif last_act == 'SIGMOID':
             x = x * (self.action_high - self.action_low) + self.action_low
         else:
             raise ValueError('Change last act to tanh or sigmoid')
     return x
Пример #4
0
    def create_network_with_tf_layers(input: tf.Tensor,
                                      network_config: list,
                                      tf_var_scope: str,
                                      net_name='',
                                      input_norm=None,
                                      output_norm=None,
                                      reuse=False,
                                      output_low=None,
                                      output_high=None):
        """
        Create a MLP network with a input tensor
        warning: this will create a input net which will cut the gradients from the input tensor and its
        previous op
        :param input:
        :param network_config:
        :param net_name:
        :param tf_var_scope:
        :param input_norm:
        :param output_norm:
        :param output_low:
        :param output_high:
        :return:
        """
        pre_var_scope_name = tf.get_variable_scope().name
        tf_var_scope_context = tf.variable_scope(tf_var_scope)
        tf_var_scope_context.__enter__()
        if pre_var_scope_name != '':
            assert tf.get_variable_scope().name == "{}/{}".format(
                pre_var_scope_name, tf_var_scope)
        else:
            assert tf.get_variable_scope().name == "{}".format(tf_var_scope)

        if reuse:
            tf.get_variable_scope().reuse_variables()
        net = input
        if input_norm:
            net = (net - input_norm[0]) / input_norm[1]
        last_layer_act = None
        for layer_config in network_config:
            if layer_config['TYPE'] == 'DENSE':
                if layer_config['B_INIT_VALUE'] is None:
                    b_init = None
                else:
                    b_init = tf.constant_initializer(
                        value=layer_config['B_INIT_VALUE'])
                l1_norm = layer_config[
                    'L1_NORM'] if 'L1_NORM' in layer_config else 0.0
                l2_norm = layer_config[
                    'L2_NORM'] if 'L2_NORM' in layer_config else 0.0
                net = tf.layers.dense(
                    inputs=net,
                    units=layer_config['N_UNITS'],
                    activation=MLPCreator.act_dict[layer_config['ACT']],
                    use_bias=b_init is not None,
                    kernel_initializer=contrib_W_init(),
                    kernel_regularizer=tf_contrib.layers.l1_l2_regularizer(
                        l1_norm, l2_norm),
                    bias_regularizer=tf_contrib.layers.l1_l2_regularizer(
                        l1_norm, l2_norm),
                    bias_initializer=b_init,
                    name=net_name + '_' + layer_config['NAME'],
                    reuse=reuse)
                last_layer_act = layer_config['ACT']
        if output_norm:
            net = (net * output_norm[0]) + output_norm[1]
        if output_high is not None and output_low is not None:
            if last_layer_act not in ("IDENTITY", 'LINEAR'):
                raise ValueError(
                    'Please set the last layer activation as IDENTITY/LINEAR to use output scale, TANH will added to it as default'
                )
            net = tf.tanh(net)
            net = (net + 1.0) / 2.0 * (output_high - output_low) + output_low
        # todo the collection may contain extra variable that is instanced by others but have same name scope
        net_all_params = get_tf_collection_var_list(
            key=tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name)
        if tf_var_scope_context is not None:
            tf_var_scope_context.__exit__(type_arg=None,
                                          value_arg=None,
                                          traceback_arg=None)
        assert tf.get_variable_scope().name == pre_var_scope_name
        return net, net, net_all_params