def _concat_layer(self, input, concat, scope, name="", share_trainables=True): layer_type = 'Concat' with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) input = tf.layers.flatten(input) if concat.get_shape()[-1] > 0: concat = tf.layers.flatten(concat) input = tf.concat([input, concat], -1) # shape: (batch, concat_size+units) input = tf.layers.dense( name='Concat_Dense1', inputs=input, units=256, activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) input = tf.layers.dense( name='Concat_Dense2', inputs=input, units=448, activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) # Update keys self._update_keys(variable_scope.name, share_trainables) # Return result return input
def _policy_layer(self, input, scope, name="", share_trainables=True): layer_type = 'Policy' with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) input = input + tf.layers.dense( name='Policy_Dense1', inputs=input, units=input.get_shape().as_list()[-1], activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(0.1)) output_list = [] for h, policy_head in enumerate(self.policy_heads): policy_depth = policy_head['depth'] policy_size = policy_head['size'] if is_continuous_control(policy_depth): # build mean mu = tf.layers.dense( name='Policy_Mu_Dense{}'.format(h), inputs=input, units=policy_size, activation=None, kernel_initializer=tf_utils.orthogonal_initializer( 0.01)) # in (-inf,inf) # build standard deviation sigma = tf.layers.dense( name='Policy_Sigma_Dense{}'.format(h), inputs=input, units=policy_size, activation=None, kernel_initializer=tf_utils.orthogonal_initializer( 0.01)) # in (-inf,inf) # clip mu and sigma to avoid numerical instabilities clipped_mu = tf.clip_by_value(mu, -1, 1) # in [-1,1] clipped_sigma = tf.clip_by_value( tf.abs(sigma), 1e-4, 1) # in [1e-4,1] # sigma must be greater than 0 # build policy batch policy_batch = tf.stack([clipped_mu, clipped_sigma]) policy_batch = tf.transpose(policy_batch, [1, 0, 2]) else: # discrete control policy_batch = tf.layers.dense( name='Policy_Logits_Dense{}'.format(h), inputs=input, units=policy_size * policy_depth, activation=None, kernel_initializer=tf_utils.orthogonal_initializer( 0.01)) if policy_size > 1: policy_batch = tf.reshape( policy_batch, [-1, policy_size, policy_depth]) output_list.append(policy_batch) # update keys self._update_keys(variable_scope.name, share_trainables) # return result return output_list
def _value_layer(self, input, scope, name="", share_trainables=True): layer_type = 'Value' with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) input = input + tf.layers.dense( name='Value_Dense1', inputs=input, units=input.get_shape().as_list()[-1], activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(0.1)) if self.qvalue_estimation: policy_depth = sum(h['depth'] for h in self.policy_heads) policy_size = sum(h['size'] for h in self.policy_heads) units = policy_size * max(1, policy_depth) output = [ tf.layers.dense( name='Value_Q{}_Dense1'.format(i), inputs=input, units=units, activation=None, kernel_initializer=tf_utils.orthogonal_initializer( 0.01)) for i in range(self.value_count) ] output = tf.stack(output) output = tf.transpose(output, [1, 0, 2]) if policy_size > 1 and policy_depth > 1: output = tf.reshape( output, [-1, self.value_count, policy_size, policy_depth]) else: output = [ # Keep value heads separated tf.layers.dense( name='Value_V{}_Dense1'.format(i), inputs=input, units=1, activation=None, kernel_initializer=tf_utils.orthogonal_initializer( 0.01)) for i in range(self.value_count) ] output = tf.stack(output) output = tf.transpose(output, [1, 0, 2]) output = tf.layers.flatten(output) # update keys self._update_keys(variable_scope.name, share_trainables) # return result return output
def _cnn_layer(self, input, scope, name="", share_trainables=True): layer_type = 'CNN' with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) # input = tf.contrib.model_pruning.masked_conv2d(inputs=input, num_outputs=16, kernel_size=(3,3), padding='SAME', activation_fn=tf.nn.leaky_relu) # xavier initializer # input = tf.contrib.model_pruning.masked_conv2d(inputs=input, num_outputs=32, kernel_size=(3,3), padding='SAME', activation_fn=tf.nn.leaky_relu) # xavier initializer input = tf.layers.conv2d( name='CNN_Conv1', inputs=input, filters=32, kernel_size=8, strides=4, padding='SAME', activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) input = tf.layers.conv2d( name='CNN_Conv2', inputs=input, filters=64, kernel_size=4, strides=2, padding='SAME', activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) input = tf.layers.conv2d( name='CNN_Conv3', inputs=input, filters=64, kernel_size=4, strides=1, padding='SAME', activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) # update keys self._update_keys(variable_scope.name, share_trainables) # return result return input
def _cnn_layer(self, input, scope, name="", share_trainables=True): layer_type = 'CNN' _, input_height, input_width, input_channel = input.get_shape( ).as_list() with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) input = tf.layers.conv2d( name='CNN_Conv1', inputs=input, filters=16, kernel_size=(input_height, 1), dilation_rate=(1, 3), padding='SAME', activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) input = tf.layers.conv2d( name='CNN_Conv2', inputs=input, filters=16, kernel_size=(1, input_width), dilation_rate=(3, 1), padding='SAME', activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) input = tf.layers.conv2d( name='CNN_Conv3', inputs=input, filters=32, kernel_size=3, padding='SAME', activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) # update keys self._update_keys(variable_scope.name, share_trainables) # return result return input
def _weights_layer(self, input, weights, scope, name="", share_trainables=True): layer_type = 'Weights' with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) kernel = tf.stop_gradient(weights['kernel']) kernel = tf.transpose(kernel, [1, 0]) kernel = tf.layers.dense( name='TS_Dense0', inputs=kernel, units=1, activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) kernel = tf.reshape(kernel, [1, -1]) bias = tf.stop_gradient(weights['bias']) bias = tf.reshape(bias, [1, -1]) weight_state = tf.concat((kernel, bias), -1) weight_state = tf.layers.dense( name='TS_Dense1', inputs=weight_state, units=64, activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) weight_state = tf.reshape(weight_state, [-1]) input = tf.layers.flatten(input) input = tf.map_fn(fn=lambda b: tf.concat((b, weight_state), -1), elems=input) # Update keys self._update_keys(variable_scope.name, share_trainables) # Return result return input
def _intrinsic_reward_layer(self, input, scope, name="", share_trainables=True): layer_type = 'RandomNetworkDistillation' with tf.variable_scope("{}/{}{}".format(scope, layer_type, name), reuse=tf.AUTO_REUSE) as variable_scope: print(" [{}]Building or reusing scope: {}".format( self.id, variable_scope.name)) # Here we use leaky_relu instead of relu as activation function # Target network target = tf.layers.conv2d( name='RND_Target_Conv1', inputs=input, filters=32, kernel_size=8, strides=4, padding='SAME', activation=tf.nn.leaky_relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) target = tf.layers.conv2d( name='RND_Target_Conv2', inputs=target, filters=64, kernel_size=4, strides=2, padding='SAME', activation=tf.nn.leaky_relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) target = tf.layers.conv2d( name='RND_Target_Conv3', inputs=target, filters=64, kernel_size=3, strides=1, padding='SAME', activation=tf.nn.leaky_relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) target = tf.layers.flatten(target) target = tf.layers.dense( name='RND_Target_Dense1', inputs=target, units=512, activation=None, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) # Predictor network prediction = tf.layers.conv2d( name='RND_Prediction_Conv1', inputs=input, filters=32, kernel_size=8, strides=4, padding='SAME', activation=tf.nn.leaky_relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) prediction = tf.layers.conv2d( name='RND_Prediction_Conv2', inputs=prediction, filters=64, kernel_size=4, strides=2, padding='SAME', activation=tf.nn.leaky_relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) prediction = tf.layers.conv2d( name='RND_Prediction_Conv3', inputs=prediction, filters=64, kernel_size=3, strides=1, padding='SAME', activation=tf.nn.leaky_relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) prediction = tf.layers.flatten(prediction) prediction = tf.layers.dense( name='RND_Prediction_Dense1', inputs=prediction, units=512, activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) prediction = tf.layers.dense( name='RND_Prediction_Dense2', inputs=prediction, units=512, activation=tf.nn.relu, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) prediction = tf.layers.dense( name='RND_Prediction_Dense3', inputs=prediction, units=512, activation=None, kernel_initializer=tf_utils.orthogonal_initializer(np.sqrt(2))) with tf.variable_scope('RND_Prediction_Dense3', reuse=True): prediction_weights = { 'kernel': tf.get_variable("kernel"), 'bias': tf.get_variable("bias") } # update keys self._update_keys(variable_scope.name, share_trainables) # return result return target, prediction, prediction_weights