def build_net(self, state, action, c_names, units_1, units_2, w_i, b_i, reg=None): with tf.variable_scope('conv1'): conv1 = conv(state, [5, 5, 3, 6], [6], [1, 2, 2, 1], w_i, b_i) with tf.variable_scope('conv2'): conv2 = conv(conv1, [3, 3, 6, 12], [12], [1, 2, 2, 1], w_i, b_i) with tf.variable_scope('flatten'): flatten = tf.contrib.layers.flatten(conv2) # 两种reshape写法 # flatten = tf.reshape(relu5, [-1, np.prod(relu5.get_shape().as_list()[1:])]) # flatten = tf.reshape(relu5, [-1, np.prod(relu5.shape.as_list()[1:])]) # print flatten.get_shape() with tf.variable_scope('dense1'): dense1 = dense(flatten, units_1, [units_1], w_i, b_i) with tf.variable_scope('dense2'): dense2 = dense(dense1, units_2, [units_2], w_i, b_i) with tf.variable_scope('concat'): concatenated = tf.concat([dense2, tf.cast(action, tf.float32)], 1) with tf.variable_scope('dense3'): dense3 = dense(concatenated, self.atoms, [self.atoms], w_i, b_i) return dense3
def get_mu_sigma(self): with tf.variable_scope('actor'): w_i = tf.random_uniform_initializer(0., 0.1) dense1 = dense(self.state_input, 200, None, w_i, None, activation=tf.nn.relu6) with tf.variable_scope('mu'): mu = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.tanh) with tf.variable_scope('sigma'): sigma = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.softplus) # return mu * self.config.ACTION_BOUND[1], sigma + 1e-4 return mu, sigma + 1e-4
def v(self): with tf.variable_scope('critic'): w_i = tf.random_uniform_initializer(0., 0.1) b_i = tf.zeros_initializer() with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 100, [100], w_i, activation=tf.nn.relu6) with tf.variable_scope('dense2'): dense2 = dense(dense1, 1, [1], w_i, b_i, activation=None) return dense2
def general_actor(self, name, num_inputs, num_outputs, ext=1, reuse=False): # ext = -1 for flexion net # Extension network : positif if state < state_input # This is the first network that will try to implement some notion of : # - symmetry. same network used for left and right leg. Only state input changes. # - phase based network. two different network groups used for swing and stance. # - general pd control. # Graph shared with Value Net with tf.variable_scope('actor/{}'.format(name)): state_dim = num_inputs cst_input = tf.constant(np.float32(np.zeros([1, 1]))) #cst_input = tf.constant(np.float32(np.random.randn(1,state_dim))) state_input = self.state_input state_target = tf.contrib.layers.fully_connected( inputs=cst_input, num_outputs=state_dim, activation_fn=None, biases_initializer=tf.random_uniform_initializer(-1.0, 1.0), scope="stateTarget") state = tf.contrib.layers.flatten(state_input) actionNet = tf.nn.relu(ext * (state_target - state)) #assert actionNet.shape == [1,state_dim] #w_i = tf.random_normal_initializer(0., 0.1) w_i = tf.random_uniform_initializer(0., 0.1) with tf.variable_scope('mu'): mu = dense(actionNet, num_outputs, None, w_i, None, activation=tf.abs) #variable_summaries_history(mu,self.is_local_net) with tf.variable_scope('sigma'): sigma = dense(actionNet, num_outputs, None, w_i, None, activation=tf.nn.sigmoid) #variable_summaries_history(sigma,self.is_local_net) return mu, sigma / 10.0
def value_get_current(self): with tf.variable_scope('critic'): w_i = tf.random_uniform_initializer(0., 0.1) type = self.config.CRITIC_NETWORK_TYPE if type == 1: with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 256, [256], w_i, activation=tf.nn.relu) if self.name == "W_0": variable_summaries_layer(dense1, self.is_local_net) with tf.variable_scope('dense2'): dense2 = dense(dense1, 1, [1], w_i, activation=None) if self.name == "W_0": variable_summaries_layer(dense2, self.is_local_net) return dense2 elif type == 2: with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 512, [512], w_i, activation=tf.nn.relu) variable_summaries_layer(dense1, self.is_local_net) with tf.variable_scope('dense2'): dense2 = dense(dense1, 256, [256], w_i, activation=tf.nn.relu) variable_summaries_layer(dense2, self.is_local_net) with tf.variable_scope('dense3'): dense3 = dense(dense2, 1, [1], w_i, b_i, activation=None) variable_summaries_layer(dense3, self.is_local_net) return dense3 else: with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 128, [128], w_i, activation=tf.nn.selu) variable_summaries_layer(dense1, self.is_local_net) with tf.variable_scope('dense2'): dense2 = dense(self.state_input, 128, [128], w_i, activation=tf.nn.selu) variable_summaries_layer(dense2, self.is_local_net) with tf.variable_scope('dense3'): dense3 = dense(tf.concat([dense1, dense2], axis=1), 1, [1], w_i, b_i, activation=None) variable_summaries_layer(dense3, self.is_local_net) return dense3
def a_prob(self): with tf.variable_scope('actor'): w_i = tf.random_uniform_initializer(0., 0.1) b_i = tf.zeros_initializer() with tf.variable_scope('dense1'): dense1 = dense(self.state_input, 200, None, w_i, b_i, activation=tf.nn.relu6) with tf.variable_scope('dense2'): dense2 = dense(dense1, self.action_dim, None, w_i, b_i, activation=tf.nn.softmax) return dense2
def get_mu_sigma(self, reuse=False): # Graph shared with Value Net with tf.variable_scope('actor'): w_i = tf.random_normal_initializer(0., 1.0) b_i = tf.zeros_initializer() if self.config.ACTOR_NETWORK_TYPE == 1: dense1 = dense(self.state_input, 256, None, w_i, b_i, activation=None) # dense1 = tf.contrib.layers.fully_connected( # inputs=tf.contrib.layers.flatten(self.state_input), # num_outputs=256, # scope="dense1") variable_summaries_layer(dense1, self.is_local_net) if not reuse: tf.contrib.layers.summarize_activation(dense1) with tf.variable_scope('mu'): mu = dense(dense1, self.action_dim, None, w_i, None, activation=tf.sin) variable_summaries_history(mu, self.is_local_net) with tf.variable_scope('sigma'): sigma = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.sigmoid) variable_summaries_history(sigma, self.is_local_net) elif self.config.ACTOR_NETWORK_TYPE == 2: conv1 = tf.contrib.layers.conv2d(self.state_input, 16, 8, self.config.TEMPORAL_WINDOW, activation_fn=lambda x: (tf.nn.relu(x)), scope="conv1") conv2 = tf.contrib.layers.conv2d(conv1, 32, self.config.TEMPORAL_WINDOW, 2, activation_fn=lambda x: (tf.nn.relu(x)), scope="conv2") # Fully connected layer dense2 = tf.contrib.layers.fully_connected( inputs=tf.contrib.layers.flatten(conv2), num_outputs=256, scope="dense2") dense1 = tf.nn.dropout(dense2, 0.9) #dense1 = dense(fc1, 200, None, w_i, None, activation=None) if not reuse: tf.contrib.layers.summarize_activation(conv1) tf.contrib.layers.summarize_activation(conv2) tf.contrib.layers.summarize_activation(dense1) with tf.variable_scope('mu'): mu = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.relu) variable_summaries_history(mu, self.is_local_net) with tf.variable_scope('sigma'): sigma = dense(dense1, self.action_dim, None, w_i, None, activation=tf.nn.sigmoid) variable_summaries_history(sigma, self.is_local_net) elif self.config.ACTOR_NETWORK_TYPE == 4: self.dense1 = dense(self.state_input, 200, None, w_i, None, activation=None) variable_summaries_layer(self.dense1, self.is_local_net) action_dim_half = int(self.action_dim / 2) #TODO CHANGE REPLAY BUFFER TO OUR NEW IMPLEMENTATION WITH TERMINAL BASED RECALL if not reuse: tf.contrib.layers.summarize_activation(self.dense1) with tf.variable_scope('muStance'): self.muStance = dense(self.dense1, action_dim_half, None, w_i, None, activation=tf.nn.selu) with tf.variable_scope('sigmaStance'): self.sigmaStance = dense(self.dense1, action_dim_half, None, w_i, None, activation=tf.nn.sigmoid) with tf.variable_scope('muSwing'): self.muSwing = dense(self.dense1, action_dim_half, None, w_i, None, activation=tf.nn.selu) with tf.variable_scope('sigmaSwing'): self.sigmaSwing = dense(self.dense1, action_dim_half, None, w_i, None, activation=tf.nn.sigmoid) with tf.variable_scope('mu'): mu = tf.concat([self.muStance, self.muSwing], axis=1) variable_summaries_history(mu, self.is_local_net) with tf.variable_scope('sigma'): sigma = tf.concat([self.sigmaStance, self.sigmaSwing], axis=1) variable_summaries_history(sigma, self.is_local_net) elif self.config.ACTOR_NETWORK_TYPE == 5: # This is the first network that will try to implement some notion of : # - symmetry. same network used for left and right leg. Only state input changes. # - phase based network. two different network groups used for swing and stance. # - general pd control. isExtensor = np.array( # HF GLU HAB HAD VAS HAM GAS SOL TA [0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0]) def concat_with_rule(arr1, arr2, rule): i2 = 0 i1 = 0 arr = [] for r in rule: if r: arr.append(arr1[0, i1]) i1 += 1 else: arr.append(arr2[0, i2]) i2 += 1 return tf.expand_dims(tf.stack(arr, axis=-1), 0) selector = 1 multiplier = 1 mu_ext_st, sigma_ext_st = self.general_actor( "extensorSt", self.state_dim, np.sum(isExtensor == selector), ext=multiplier, reuse=True) multiplier = 0.3 mu_ext_sw, sigma_ext_sw = self.general_actor( "extensorSw", self.state_dim, np.sum(isExtensor == selector), ext=multiplier, reuse=True) selector = 0 multiplier = -0.3 mu_flex_st, sigma_flex_st = self.general_actor( "flexorSt", self.state_dim, np.sum(isExtensor == selector), ext=multiplier, reuse=True) multiplier = -1 mu_flex_sw, sigma_flex_sw = self.general_actor( "flexorSw", self.state_dim, np.sum(isExtensor == selector), ext=multiplier, reuse=True) mu_st = concat_with_rule(mu_ext_st, mu_flex_st, isExtensor) sigma_st = concat_with_rule(sigma_ext_st, sigma_flex_st, isExtensor) mu_sw = concat_with_rule(mu_ext_sw, mu_flex_sw, isExtensor) sigma_sw = concat_with_rule(sigma_ext_sw, sigma_flex_sw, isExtensor) mu = tf.concat([mu_st, mu_sw], axis=1) sigma = tf.concat([sigma_st, sigma_sw], axis=1) return mu, sigma else: raise ValueError( 'Network type "{}" not implemented, should be integer'. format(self.config.ACTOR_NETWORK_TYPE)) # return mu * self.config.ACTION_BOUND[1], sigma + 1e-4 return mu, sigma + 1e-4
def action_get_current(self, reuse=False): # Graph shared with Value Net with tf.variable_scope('actor'): w_i = tf.initializers.glorot_normal() b_i = tf.zeros_initializer() if self.config.ACTOR_NETWORK_TYPE == 1: with tf.variable_scope('act_dense1'): dense1 = dense(self.state_input, 512, [512], w_i, activation=tf.nn.relu) if not reuse and self.name == "W_0": variable_summaries_layer(dense1, self.is_local_net) with tf.variable_scope('act_dense2'): dense2 = dense(dense1, 256, [256], w_i, activation=tf.nn.relu) if not reuse and self.name == "W_0": variable_summaries_layer(dense2, self.is_local_net) if not reuse and self.name == "W_0": tf.contrib.layers.summarize_activation(dense1) tf.contrib.layers.summarize_activation(dense2) with tf.variable_scope('mu'): mu = dense(dense2, self.action_dim, None, w_i, activation=None) if not reuse and self.name == "W_0": variable_summaries_history(mu, self.is_local_net) elif self.config.ACTOR_NETWORK_TYPE == 11: with tf.variable_scope('act_dense1'): dense1 = dense(self.state_input, 512, [512], w_i, activation=tf.nn.relu) if not reuse and self.name == "W_0": variable_summaries_layer(dense1, self.is_local_net) with tf.variable_scope('act_dense2'): dense2 = dense(dense1, 256, [256], w_i, activation=tf.nn.relu) if not reuse and self.name == "W_0": variable_summaries_layer(dense2, self.is_local_net) with tf.variable_scope('act_dense3'): dense3 = dense(dense2, 256, [256], w_i, b_i, activation=tf.nn.relu) if not reuse and self.name == "W_0": variable_summaries_layer(dense3, self.is_local_net) if not reuse and self.name == "W_0": tf.contrib.layers.summarize_activation(dense1) tf.contrib.layers.summarize_activation(dense2) tf.contrib.layers.summarize_activation(dense3) with tf.variable_scope('mu'): mu = dense(dense3, self.action_dim, None, w_i, activation=None) if not reuse and self.name == "W_0": variable_summaries_history(mu, self.is_local_net) else: raise ValueError( 'Network type "{}" not implemented, should be integer'. format(self.config.ACTOR_NETWORK_TYPE)) return mu