def __call__(self, obs, action, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() x = obs y = action count = 0 for config in self.net_config: if count == 0: x = tf.layers.dense( x, config['N_UNITS'], kernel_initializer=contrib_W_init(), bias_initializer=tf.constant_initializer( value=config['B_INIT_VALUE'])) y = tf.layers.dense( y, config['N_UNITS'], kernel_initializer=contrib_W_init(), bias_initializer=tf.constant_initializer( value=config['B_INIT_VALUE'])) x = tf.concat([x, y], axis=1) count += 1 else: if config['TYPE'] == 'DENSE': x = tf.layers.dense( x, config['N_UNITS'], kernel_initializer=contrib_W_init(), bias_initializer=tf.constant_initializer( value=config['B_INIT_VALUE'])) else: raise NotImplementedError( "Not support this type layer: %s" % config['TYPE']) if self.layer_norm == 1 and config['NAME'] != 'OUTPUT': x = tc.layers.layer_norm(x, center=True, scale=True) if act_dict[config['ACT']] is None: raise NotImplementedError( "Not support this type activation function: %s" % config['ACT']) else: x = act_dict[config['ACT']](x) count += 1 return x
def create_network(input, network_config, net_name=None, input_norm=None, output_norm=None, output_low=None, output_high=None, reuse=False): # network_config should be a list consist of dict # input_norm, output_norm is a list consist of two tensorflow placeholder net = tl.layers.InputLayer(inputs=input, name=net_name + '_INPUT') if input_norm: net = tl.layers.LambdaLayer(prev_layer=net, fn=lambda x: (x - input_norm[0]) / input_norm[1]) last_layer_act = None for layer_config in network_config: if layer_config['TYPE'] == 'DENSE': if layer_config['B_INIT_VALUE'] == 'None': b_init = None else: b_init = tf.constant_initializer( value=layer_config['B_INIT_VALUE']) net = tl.layers.DenseLayer( prev_layer=net, n_units=layer_config['N_UNITS'], act=NetworkCreator.act_dict[layer_config['ACT']], name=net_name + '_' + layer_config['NAME'], W_init=contrib_W_init(), b_init=b_init) last_layer_act = layer_config['ACT'] if output_norm: net = tl.layers.LambdaLayer(prev_layer=net, fn=lambda x: (x * output_norm[0]) + output_norm[1], name=net_name + '_NORM') if output_high is not None and output_low is not None: if last_layer_act != "IDENTITY": raise ValueError( 'Please set the last layer activation as identity to use output scale' ) net = tl.layers.LambdaLayer(prev_layer=net, fn=lambda x: tf.nn.tanh(x), name=net_name + '_TANH') net = tl.layers.LambdaLayer( prev_layer=net, fn=lambda x: (x + 1.0) / 2.0 * (output_high - output_low) + output_low, name=net_name + '_NORM_AFTER_TANH') # TODO ADD MORE SUPPORT FOR DIFFERENT LAYER return net, net.outputs, net.all_params
def __call__(self, obs, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() x = obs last_act = None for config in self.net_config: use_bias = not (config['B_INIT_VALUE']) == 'None' if config['TYPE'] == 'DENSE': x = tf.layers.dense( x, config['N_UNITS'], kernel_initializer=contrib_W_init(), bias_initializer=tf.constant_initializer( value=config['B_INIT_VALUE']), use_bias=use_bias) else: raise NotImplementedError( "Not support this type layer: %s" % config['TYPE']) if self.layer_norm == 1 and config['NAME'] != 'OUTPUT': x = tc.layers.layer_norm(x, center=True, scale=True) if act_dict[config['ACT']] is None: raise NotImplementedError( "Not support this type activation function: %s" % config['ACT']) else: x = act_dict[config['ACT']](x) last_act = config['ACT'] if last_act == "TANH": x = (x + 1.0) / 2.0 * (self.action_high - self.action_low) + self.action_low elif last_act == 'SIGMOID': x = x * (self.action_high - self.action_low) + self.action_low else: raise ValueError('Change last act to tanh or sigmoid') return x
def create_network_with_tf_layers(input: tf.Tensor, network_config: list, tf_var_scope: str, net_name='', input_norm=None, output_norm=None, reuse=False, output_low=None, output_high=None): """ Create a MLP network with a input tensor warning: this will create a input net which will cut the gradients from the input tensor and its previous op :param input: :param network_config: :param net_name: :param tf_var_scope: :param input_norm: :param output_norm: :param output_low: :param output_high: :return: """ pre_var_scope_name = tf.get_variable_scope().name tf_var_scope_context = tf.variable_scope(tf_var_scope) tf_var_scope_context.__enter__() if pre_var_scope_name != '': assert tf.get_variable_scope().name == "{}/{}".format( pre_var_scope_name, tf_var_scope) else: assert tf.get_variable_scope().name == "{}".format(tf_var_scope) if reuse: tf.get_variable_scope().reuse_variables() net = input if input_norm: net = (net - input_norm[0]) / input_norm[1] last_layer_act = None for layer_config in network_config: if layer_config['TYPE'] == 'DENSE': if layer_config['B_INIT_VALUE'] is None: b_init = None else: b_init = tf.constant_initializer( value=layer_config['B_INIT_VALUE']) l1_norm = layer_config[ 'L1_NORM'] if 'L1_NORM' in layer_config else 0.0 l2_norm = layer_config[ 'L2_NORM'] if 'L2_NORM' in layer_config else 0.0 net = tf.layers.dense( inputs=net, units=layer_config['N_UNITS'], activation=MLPCreator.act_dict[layer_config['ACT']], use_bias=b_init is not None, kernel_initializer=contrib_W_init(), kernel_regularizer=tf_contrib.layers.l1_l2_regularizer( l1_norm, l2_norm), bias_regularizer=tf_contrib.layers.l1_l2_regularizer( l1_norm, l2_norm), bias_initializer=b_init, name=net_name + '_' + layer_config['NAME'], reuse=reuse) last_layer_act = layer_config['ACT'] if output_norm: net = (net * output_norm[0]) + output_norm[1] if output_high is not None and output_low is not None: if last_layer_act not in ("IDENTITY", 'LINEAR'): raise ValueError( 'Please set the last layer activation as IDENTITY/LINEAR to use output scale, TANH will added to it as default' ) net = tf.tanh(net) net = (net + 1.0) / 2.0 * (output_high - output_low) + output_low # todo the collection may contain extra variable that is instanced by others but have same name scope net_all_params = get_tf_collection_var_list( key=tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name) if tf_var_scope_context is not None: tf_var_scope_context.__exit__(type_arg=None, value_arg=None, traceback_arg=None) assert tf.get_variable_scope().name == pre_var_scope_name return net, net, net_all_params