def DiagonalwiseRefactorization(x, in_channels, stride=1, groups=4, is_training=True, scope='depthwise', kernel_size=3): with tf.variable_scope(scope): channels = int(in_channels / groups) mask = tf.constant(get_mask(channels, kernel_size).tolist(), dtype=tf.float32, shape=(kernel_size, kernel_size, channels, channels)) splitw = [ tf.get_variable('weights_%d' % _, (kernel_size, kernel_size, channels, channels), initializer=ortho_init(init_scale)) for _ in range(groups) ] splitw = [tf.multiply(w, mask) for w in splitw] splitx = tf.split(x, groups, 1) splitx = [ tf.nn.conv2d(x, w, (1, 1, stride, stride), 'SAME', data_format='NCHW') for x, w in zip(splitx, splitw) ] x = tf.concat(splitx, 1) x = tf.nn.relu(x) return x
def Pointwise(x, in_channels, out_channels, is_training=True, scope='pointwise'): with tf.variable_scope(scope): w = tf.get_variable('weights', (1, 1, in_channels, out_channels), initializer=ortho_init(init_scale)) x = tf.nn.conv2d(x, w, (1, 1, 1, 1), 'SAME', data_format='NCHW') x = tf.nn.relu(x) return x
def linear(input_tensor, scope, n_hidden, *, init_scale=1.0, init_bias=0.0): """ Creates a fully connected layer for TensorFlow :param input_tensor: (TensorFlow Tensor) The input tensor for the fully connected layer :param scope: (str) The TensorFlow variable scope :param n_hidden: (int) The number of hidden neurons :param init_scale: (int) The initialization scale :param init_bias: (int) The initialization offset bias :return: (TensorFlow Tensor) fully connected layer """ with tf.variable_scope(scope): n_input = input_tensor.get_shape()[1].value weight = tf.get_variable("w", [n_input, n_hidden], initializer=ortho_init(init_scale)) bias = tf.get_variable("b", [n_hidden], initializer=tf.constant_initializer(init_bias)) return tf.matmul(input_tensor, weight) + bias
def Depthwise(x, in_channels, stride=1, is_training=True, scope='depthwise', kernel_size=3): with tf.variable_scope(scope): w = tf.get_variable('weights', (kernel_size, kernel_size, in_channels, 1), initializer=ortho_init(init_scale)) x = tf.nn.depthwise_conv2d(x, w, (1, 1, stride, stride), 'SAME', data_format='NCHW') x = tf.nn.relu(x) return x
def build_linear_layer(self, input_tensor, scope, n_hidden, *, init_scale=1.0, init_bias=0.0): """ Creates a fully connected layer for TensorFlow :param input_tensor: (TensorFlow Tensor) The input tensor for the fully connected layer :param scope: (str) The TensorFlow variable scope :param n_hidden: (int) The number of hidden neurons :param init_scale: (int) The initialization scale :param init_bias: (int) The initialization offset bias :return: (TensorFlow Tensor) fully connected layer """ with tf.variable_scope(scope): n_input = input_tensor.get_shape()[1].value weight = tf.get_variable("w", [n_input, n_hidden], initializer=ortho_init(init_scale), regularizer= (tf.keras.regularizers.l2(cfg.l2_coef) if cfg.is_mod(cfg.MOD_L2_REG) else None)) bias = tf.get_variable("b", [n_hidden], initializer=tf.constant_initializer(init_bias)) return tf.matmul(input_tensor, weight) + bias
def Conv3x3(x, in_channels, out_channels, stride=1, is_training=True, scope='convolution', kernel_size=3): with tf.variable_scope(scope): w = tf.get_variable('weight', shape=(kernel_size, kernel_size, in_channels, out_channels), initializer=ortho_init(init_scale)) x = tf.nn.conv2d(x, w, (1, 1, stride, stride), 'SAME', data_format='NCHW') x = tf.nn.relu(x) return x
enemies = [(train_idx + 1) % 4, (train_idx + 3) % 4] enemies.sort() random_explore = True update_eps = 0.2 pgn = False n_actions = 122 # replay_buffer hindsight = False her_size = 30 her_K = 4 # dfp_policy import tensorflow as tf from stable_baselines.a2c.utils import ortho_init init_scale = 1 conv_init = ortho_init(init_scale) # conv_init = tf.glorot_normal_initializer() linear_init = tf.glorot_normal_initializer() # dfp buffer_size = 50000 learning_starts = 10000 exploration_final_eps = 0.2 exploration_fraction = 0.05 gamma = 0.99 batch_size = 64 lr_decay_step = 5e5 decay_rate = 0.3
def init_weights(scale, shape): init_function = ortho_init(scale) return init_function(shape)
def __init__(self, num_processing_steps=None, latent_size=None, n_layers=None, edge_output_size=None, node_output_size=None, global_output_size=None, reducer=None, out_init_scale=5.0, name="AggregationNet"): super(AggregationDiffNet, self).__init__(name=name) if num_processing_steps is None: self._proc_hops = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] else: self._proc_hops = num_processing_steps if reducer is None or reducer == 'max': reducer = unsorted_segment_max_or_zero elif reducer == 'logsumexp': reducer = segment_logsumexp elif reducer == 'softmax': reducer = segment_transformer elif reducer == 'sum': reducer = tf.math.unsorted_segment_sum else: raise ValueError('Unkown reducer!') if latent_size is None: latent_size = 16 if n_layers is None: n_layers = 2 self._num_processing_steps = len(self._proc_hops) self._n_stacked = latent_size * self._num_processing_steps def make_mlp(): return snt.nets.MLP([latent_size] * n_layers, activate_final=True) # def make_linear(): # return snt.nets.MLP([latent_size], activate_final=False) self._core = modules.GraphNetwork( edge_model_fn=make_mlp, node_model_fn=make_mlp, global_model_fn=make_mlp, edge_block_opt={'use_globals': False}, node_block_opt={ 'use_globals': False, 'use_sent_edges': False }, name="graph_net", reducer=reducer) self._encoder = modules.GraphIndependent(make_mlp, make_mlp, make_mlp, name="encoder") self._decoder = modules.GraphIndependent(make_mlp, make_mlp, make_mlp, name="decoder") inits = { 'w': ortho_init(out_init_scale), 'b': tf.constant_initializer(0.0) } # Transforms the outputs into the appropriate shapes. edge_fn = None if edge_output_size is None else lambda: snt.Linear( edge_output_size, initializers=inits, name="edge_output") node_fn = None if node_output_size is None else lambda: snt.Linear( node_output_size, initializers=inits, name="node_output") global_fn = None if global_output_size is None else lambda: snt.Linear( global_output_size, initializers=inits, name="global_output") with self._enter_variable_scope(): self._output_transform = modules.GraphIndependent(edge_fn, node_fn, global_fn, name="output")
def __init__(self, num_processing_steps=None, latent_size=None, n_layers=None, edge_output_size=None, node_output_size=None, global_output_size=None, reducer=None, out_init_scale=5.0, name="AggregationNet"): super(AggregationNet, self).__init__(name=name) if num_processing_steps is None: self._num_processing_steps = 5 else: self._num_processing_steps = num_processing_steps if reducer is None or reducer == 'max': reducer = unsorted_segment_max_or_zero elif reducer == 'mean': reducer = tf.math.unsorted_segment_mean elif reducer == 'sum': reducer = tf.math.unsorted_segment_sum else: raise ValueError('Unknown reducer!') if latent_size is None: latent_size = 16 if n_layers is None: n_layers = 2 def make_mlp(): return snt.nets.MLP([latent_size] * n_layers, activate_final=True) if self._num_processing_steps > 0: # Edge block copies the node features onto the edges. core_a = blocks.EdgeBlock(edge_model_fn=lambda: Identity(), use_edges=False, use_receiver_nodes=False, use_sender_nodes=True, use_globals=False, name='LinearNodeAggGCN_core_a') # Then, edge data is aggregated onto the node by the reducer function. core_b = blocks.NodeBlock(node_model_fn=lambda: Identity(), use_received_edges=True, use_sent_edges=False, use_nodes=False, use_globals=False, received_edges_reducer=reducer, name='LinearNodeAggGCN_core_b') self._cores = [core_a, core_b] self._encoder = modules.GraphIndependent(make_mlp, make_mlp, make_mlp, name="encoder") self._decoder = modules.GraphIndependent(make_mlp, make_mlp, make_mlp, name="decoder") inits = { 'w': ortho_init(out_init_scale), 'b': tf.constant_initializer(0.0) } # Transforms the outputs into the appropriate shapes. edge_fn = None if edge_output_size is None else lambda: snt.Linear( edge_output_size, initializers=inits, name="edge_output") node_fn = None if node_output_size is None else lambda: snt.Linear( node_output_size, initializers=inits, name="node_output") global_fn = None if global_output_size is None else lambda: snt.Linear( global_output_size, initializers=inits, name="global_output") with self._enter_variable_scope(): self._output_transform = modules.GraphIndependent(edge_fn, node_fn, global_fn, name="output")
def __init__(self, num_processing_steps=None, latent_size=None, n_layers=None, edge_output_size=None, node_output_size=None, global_output_size=None, reducer=None, out_init_scale=5.0, name="AggregationNet"): super(NonLinearGraphNet, self).__init__(name=name) if num_processing_steps is None: self._num_processing_steps = 5 else: self._num_processing_steps = num_processing_steps if reducer is None or reducer == 'max': reducer = unsorted_segment_max_or_zero elif reducer == 'mean': reducer = tf.math.unsorted_segment_mean elif reducer == 'sum': reducer = tf.math.unsorted_segment_sum else: raise ValueError('Unknown reducer!') if latent_size is None: latent_size = 16 if n_layers is None: n_layers = 2 def make_mlp(): return snt.nets.MLP([latent_size] * n_layers, activate_final=False) if self._num_processing_steps > 0: # Edge model f^e(v_sender, v_receiver, e) - in the linear linear model, f^e = v_sender # Average over all the received edge features to get e' # Node model f^v(v, e'), but in the linear model, it was just f^v = e' self._core = modules.GraphNetwork( edge_model_fn=make_mlp, node_model_fn=make_mlp, global_model_fn=make_mlp, edge_block_opt={'use_globals': False}, node_block_opt={ 'use_globals': False, 'use_sent_edges': False }, name="graph_net", reducer=reducer) self._encoder = modules.GraphIndependent(make_mlp, make_mlp, make_mlp, name="encoder") self._decoder = modules.GraphIndependent(make_mlp, make_mlp, make_mlp, name="decoder") inits = { 'w': ortho_init(out_init_scale), 'b': tf.constant_initializer(0.0) } # Transforms the outputs into the appropriate shapes. edge_fn = None if edge_output_size is None else lambda: snt.Linear( edge_output_size, initializers=inits, name="edge_output") node_fn = None if node_output_size is None else lambda: snt.Linear( node_output_size, initializers=inits, name="node_output") global_fn = None if global_output_size is None else lambda: snt.Linear( global_output_size, initializers=inits, name="global_output") with self._enter_variable_scope(): self._output_transform = modules.GraphIndependent(edge_fn, node_fn, global_fn, name="output")
def mlp_extractor(flat_observations, net_arch, act_fun, layer_norm=False): """ Constructs an MLP that receives observations as an input and outputs a latent representation for the policy and a value network. The ``net_arch`` parameter allows to specify the amount and size of the hidden layers and how many of them are shared between the policy network and the value network. It is assumed to be a list with the following structure: 1. An arbitrary length (zero allowed) number of integers each specifying the number of units in a shared layer. If the number of ints is zero, there will be no shared layers. 2. An optional dict, to specify the following non-shared layers for the value network and the policy network. It is formatted like ``dict(vf=[<value layer sizes>], pi=[<policy layer sizes>])``. If it is missing any of the keys (pi or vf), no non-shared layers (empty list) is assumed. For example to construct a network with one shared layer of size 55 followed by two non-shared layers for the value network of size 255 and a single non-shared layer of size 128 for the policy network, the following layers_spec would be used: ``[55, dict(vf=[255, 255], pi=[128])]``. A simple shared network topology with two layers of size 128 would be specified as [128, 128]. :param flat_observations: (tf.Tensor) The observations to base policy and value function on. :param net_arch: ([int or dict]) The specification of the policy and value networks. See above for details on its formatting. :param act_fun: (tf function) The activation function to use for the networks. :return: (tf.Tensor, tf.Tensor) latent_policy, latent_value of the specified network. If all layers are shared, then ``latent_policy == latent_value`` """ latent = flat_observations policy_only_layers = [] # Layer sizes of the network that only belongs to the policy network value_only_layers = [] # Layer sizes of the network that only belongs to the value network # Iterate through the shared layers and build the shared parts of the network for idx, layer in enumerate(net_arch): if isinstance(layer, int): # Check that this is a shared layer layer_size = layer latent = act_fun(linear(latent, "shared_fc{}".format(idx), layer_size, init_scale=np.sqrt(2))) else: assert isinstance(layer, dict), "Error: the net_arch list can only contain ints and dicts" if 'pi' in layer: assert isinstance(layer['pi'], list), "Error: net_arch[-1]['pi'] must contain a list of integers." policy_only_layers = layer['pi'] if 'vf' in layer: assert isinstance(layer['vf'], list), "Error: net_arch[-1]['vf'] must contain a list of integers." value_only_layers = layer['vf'] break # From here on the network splits up in policy and value network # Build the non-shared part of the network latent_policy = latent latent_value = latent for idx, (pi_layer_size, vf_layer_size) in enumerate(zip_longest(policy_only_layers, value_only_layers)): if pi_layer_size is not None: assert isinstance(pi_layer_size, int), "Error: net_arch[-1]['pi'] must only contain integers." if layer_norm: with tf.variable_scope("pi_fc{}".format(idx)): n_input = latent_policy.get_shape()[1].value weight = tf.get_variable("w", [n_input, pi_layer_size], initializer=ortho_init(np.sqrt(2))) bias = tf.get_variable("b", [pi_layer_size], initializer=tf.constant_initializer(np.sqrt(2))) pi_h = tf.matmul(latent_policy, weight) + bias pi_h = tf.contrib.layers.layer_norm(pi_h, center=True, scale=True) latent_policy = act_fun(pi_h) else: latent_policy = act_fun(linear(latent_policy, "pi_fc{}".format(idx), pi_layer_size, init_scale=np.sqrt(2))) if vf_layer_size is not None: assert isinstance(vf_layer_size, int), "Error: net_arch[-1]['vf'] must only contain integers." latent_value = act_fun(linear(latent_value, "vf_fc{}".format(idx), vf_layer_size, init_scale=np.sqrt(2))) return latent_policy, latent_value
def separable_conv(input_tensor, scope, *, n_filters, filter_size, stride, pad='VALID', channel_multiplier=1, init_scale=1.0, data_format='NHWC', one_dim_bias=False): """ Creates a 2d convolutional layer for TensorFlow :param input_tensor: (TensorFlow Tensor) The input tensor for the convolution :param scope: (str) The TensorFlow variable scope :param n_filters: (int) The number of filters :param filter_size: (Union[int, [int], tuple<int, int>]) The filter size for the squared kernel matrix, or the height and width of kernel filter if the input is a list or tuple :param stride: (int) The stride of the convolution :param pad: (str) The padding type ('VALID' or 'SAME') :param init_scale: (int) The initialization scale :param data_format: (str) The data format for the convolution weights :param one_dim_bias: (bool) If the bias should be one dimentional or not :return: (TensorFlow Tensor) 2d convolutional layer """ if isinstance(filter_size, list) or isinstance(filter_size, tuple): assert len(filter_size) == 2, \ "Filter size must have 2 elements (height, width), {} were given".format(len(filter_size)) filter_height = filter_size[0] filter_width = filter_size[1] else: filter_height = filter_size filter_width = filter_size if data_format == 'NHWC': channel_ax = 3 strides = [1, stride, stride, 1] bshape = [1, 1, 1, n_filters] elif data_format == 'NCHW': channel_ax = 1 strides = [1, 1, stride, stride] bshape = [1, n_filters, 1, 1] else: raise NotImplementedError bias_var_shape = [n_filters] if one_dim_bias else [1, n_filters, 1, 1] n_input = input_tensor.get_shape()[channel_ax].value wshape = [filter_height, filter_width, n_input, n_filters] with tf.variable_scope(scope): depthwise_filter = tf.get_variable(shape=(filter_height, filter_width, n_input, channel_multiplier), name="deptwise_filter", initializer=ortho_init(init_scale)) pointwise_filter = tf.get_variable( shape=[1, 1, channel_multiplier * n_input, n_filters], name="pointwise_filter", initializer=ortho_init(init_scale)) bias = tf.get_variable("b", bias_var_shape, initializer=tf.constant_initializer(0.0)) if not one_dim_bias and data_format == 'NHWC': bias = tf.reshape(bias, bshape) output = tf.nn.separable_conv2d(input_tensor, depthwise_filter, pointwise_filter, strides=strides, padding=pad, data_format=data_format) return bias + output
def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0): master_W, master_b = get_aggregation_var(pi_latent_vector, 'master', self.sources_actions.get_shape()[1], self.n_cat, no_bias=self.no_bias, SDW=self.SDW, bias_layer_initializer=ortho_init(init_scale), summary=self.summary) pdparam = affine_transformation(self.sources_actions, master_W, master_b, summary=self.summary) q_values = linear(vf_latent_vector, 'q', self.n_cat, init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat(pdparam), pdparam, q_values
def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0): master_W, master_b = get_aggregation_var(pi_latent_vector, 'master', self.sources_actions.get_shape()[1], self.sources_actions.get_shape()[2], no_bias=self.no_bias, SDW=self.SDW, bias_layer_initializer=ortho_init(init_scale), summary=self.summary) mean = affine_transformation(self.sources_actions, master_W, master_b, summary=self.summary) logstd = tf.get_variable(name='pi/logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) q_values = linear(vf_latent_vector, 'q', self.size, init_scale=init_scale, init_bias=init_bias) return self.proba_distribution_from_flat(pdparam), mean, q_values