def _build_network_weights(self): ''' @brief: build the network ''' # step -1: build the baseline network mlp if needed if self._shared_network: MLP_baseline_shape = self._network_shape + [1] + \ [self._hidden_dim] # (l_1, l_2, ..., l_o, l_i) MLP_baseline_act_func = ['tanh'] * (len(MLP_baseline_shape) - 1) MLP_baseline_act_func[-1] = None with tf.variable_scope('baseline'): self._MLP_baseline_out = nn.MLP(MLP_baseline_shape, init_method=self._init_method, act_func=MLP_baseline_act_func, add_bias=True, scope='vpred') # step 1: build the weight parameters (mlp, gru) with tf.variable_scope(self._name_scope): # step 1_1: build the embedding matrix (mlp) # tensor shape (None, para_size) --> (None, input_dim - ob_size) assert self._input_feat_dim % 2 == 0 if 'noninput' not in self._gnn_embedding_option: self._MLP_embedding = { node_type: nn.MLP( [ self._input_feat_dim / 2, self._node_info['para_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] if self._node_info['ob_size_dict'][node_type] > 0 } self._MLP_embedding.update({ node_type: nn.MLP( [ self._input_feat_dim, self._node_info['para_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] if self._node_info['ob_size_dict'][node_type] == 0 }) else: embedding_vec_size = max( np.reshape([ max(self._node_info['node_parameters'][i_key]) for i_key in self._node_info['node_parameters'] ], [-1])) + 1 embedding_vec_size = int(embedding_vec_size) self._embedding_variable = {} out = self._npr.randn(embedding_vec_size, self._input_feat_dim / 2).astype( np.float32) out *= 1.0 / np.sqrt(np.square(out).sum(axis=0, keepdims=True)) self._embedding_variable[False] = tf.Variable( out, name='embedding_HALF', trainable=self._trainable) if np.any([ node_size == 0 for _, node_size in self._node_info['ob_size_dict'].iteritems() ]): out = self._npr.randn(embedding_vec_size, self._input_feat_dim).astype( np.float32) out *= 1.0 / np.sqrt( np.square(out).sum(axis=0, keepdims=True)) self._embedding_variable[True] = tf.Variable( out, name='embedding_FULL', trainable=self._trainable) # step 1_2: build the ob mapping matrix (mlp) # tensor shape (None, para_size) --> (None, input_dim - ob_size) self._MLP_ob_mapping = { node_type: nn.MLP( [ self._input_feat_dim / 2, self._node_info['ob_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] if self._node_info['ob_size_dict'][node_type] > 0 } with tf.variable_scope(self._name_scope): # step 1_4: build the mlp for the propagation between nodes MLP_prop_shape = self._network_shape + \ [self._hidden_dim] + [self._hidden_dim] self._MLP_prop = { i_edge: nn.MLP(MLP_prop_shape, init_method=self._init_method, act_func=['tanh'] * (len(MLP_prop_shape) - 1), add_bias=True, scope='MLP_prop_edge_{}'.format(i_edge)) for i_edge in self._node_info['edge_type_list'] } logger.info('building prop mlp for edge type {}'.format( self._node_info['edge_type_list'])) # step 1_5: build the node update function for each node type if self._node_update_method == 'GRU': self._Node_update = { i_node_type: nn.GRU(self._hidden_dim, self._hidden_dim, init_method=self._init_method, scope='GRU_node_{}'.format(i_node_type)) for i_node_type in self._node_info['node_type_dict'] } else: assert self._node_update_method == 'MLP' hidden_MLP_update_shape = self._network_shape self._Node_update = { i_node_type: nn.MLPU(message_dim=self._hidden_dim, embedding_dim=self._hidden_dim, hidden_shape=hidden_MLP_update_shape, init_method=self._init_method, act_func_type='tanh', add_bias=True, scope='MLPU_node_{}'.format(i_node_type)) for i_node_type in self._node_info['node_type_dict'] } logger.info( 'building node update function for node type {}'.format( self._node_info['node_type_dict'])) # step 1_6: the mlp for the mu of the actions MLP_out_shape = self._network_shape + [1] + \ [self._hidden_dim] # (l_1, l_2, ..., l_o, l_i) MLP_out_act_func = ['tanh'] * (len(MLP_out_shape) - 1) MLP_out_act_func[-1] = None if not self._is_baseline: self._MLP_Out = { output_type: nn.MLP(MLP_out_shape, init_method=self._init_method, act_func=MLP_out_act_func, add_bias=True, scope='MLP_out') for output_type in self._node_info['output_type_dict'] } # step 1_7 (optional): the mlp for the log std of the actions else: self._MLP_Out = nn.MLP(MLP_out_shape, init_method=self._init_method, act_func=MLP_out_act_func, add_bias=True, scope='MLP_out') # step 1_8: build the log std for the actions with tf.variable_scope(self._name_scope): # size: [1, num_action] self._action_dist_logstd = tf.Variable( (0.0 * self._npr.randn(1, self._output_size)).astype( np.float32), name="policy_logstd", trainable=self._trainable)
def _build_network_weights(self): ''' @brief: build the network @weights: _MLP_embedding (1 layer) _MLP_ob_mapping (1 layer) _MLP_prop (2 layer) _MLP_output (2 layer) ''' # step 1: build the weight parameters (mlp, gru) with tf.variable_scope(self._name_scope): # step 1_1: build the embedding matrix (mlp) # tensor shape (None, para_size) --> (None, input_dim - ob_size) assert self._input_feat_dim % 2 == 0 if 'noninput' not in self._gnn_embedding_option: self._MLP_embedding = { node_type: nn.MLP( [ self._input_feat_dim / 2, self._node_info['para_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] if self._node_info['ob_size_dict'][node_type] > 0 } self._MLP_embedding.update({ node_type: nn.MLP( [ self._input_feat_dim, self._node_info['para_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] if self._node_info['ob_size_dict'][node_type] == 0 }) else: embedding_vec_size = max( np.reshape([ max(self._node_info['node_parameters'][i_key]) for i_key in self._node_info['node_parameters'] ], [-1])) + 1 embedding_vec_size = int(embedding_vec_size) self._embedding_variable = {} out = self._npr.randn(embedding_vec_size, int(self._input_feat_dim / 2)).astype( np.float32) out *= 1.0 / np.sqrt(np.square(out).sum(axis=0, keepdims=True)) self._embedding_variable[False] = tf.Variable( out, name='embedding_HALF', trainable=self._trainable) if np.any([ node_size == 0 for _, node_size in self._node_info['ob_size_dict'].items() ]): out = self._npr.randn(embedding_vec_size, self._input_feat_dim).astype( np.float32) out *= 1.0 / np.sqrt( np.square(out).sum(axis=0, keepdims=True)) self._embedding_variable[True] = tf.Variable( out, name='embedding_FULL', trainable=self._trainable) # step 1_2: build the ob mapping matrix (mlp) # tensor shape (None, para_size) --> (None, input_dim - ob_size) self._MLP_ob_mapping = { node_type: nn.MLP( [ self._input_feat_dim / 2, self._node_info['ob_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] if self._node_info['ob_size_dict'][node_type] > 0 } # step 1_4: build the mlp for the propogation between nodes ''' MLP_prop_shape = self._network_shape + \ [self._hidden_dim] + [self._hidden_dim] self._MLP_prop = { i_edge: nn.MLP( MLP_prop_shape, init_method=self._init_method, act_func=['tanh'] * (len(MLP_prop_shape) - 1), add_bias=True, scope='MLP_prop_edge_{}'.format(i_edge) ) for i_edge in self._node_info['edge_type_list'] } ''' # step 1_5: build the node update function for each node type if self._node_update_method == 'GRU': self._Node_update = { i_node_type: nn.GRU( self._hidden_dim, # for both the message and ob self._hidden_dim, init_method=self._init_method, scope='GRU_node_{}'.format(i_node_type)) for i_node_type in self._node_info['node_type_dict'] } else: assert False # step 1_6: the mlp for the mu of the actions # (l_1, l_2, ..., l_o, l_i) MLP_out_shape = self._network_shape + \ [self.args.gnn_output_per_node] + [self._hidden_dim] MLP_out_act_func = ['tanh'] * (len(MLP_out_shape) - 1) MLP_out_act_func[-1] = None self._MLP_Out = { output_type: nn.MLP(MLP_out_shape, init_method=self._init_method, act_func=MLP_out_act_func, add_bias=True, scope='MLP_out') for output_type in self._node_info['output_type_dict'] } # step 1_8: build the log std for the actions self._action_dist_logstd = tf.Variable( (0.0 * self._npr.randn(1, self._output_size)).astype( np.float32), name="policy_logstd", trainable=self._trainable)
def _build_network_weights(self): ''' @brief: build the network ''' # step 1: build the weight parameters (mlp, gru) with tf.variable_scope(self._name_scope): # step 1_1: build the embedding matrix (mlp) # tensor shape (None, para_size) --> (None, input_dim - ob_size) self._MLP_embedding = { node_type: nn.MLP( [ self._input_feat_dim, self._node_info['para_size_dict'][node_type] ], init_method=self._init_method, act_func=['tanh'] * 1, # one layer at most add_bias=True, scope='MLP_embedding_node_type_{}'.format(node_type)) for node_type in self._node_info['node_type_dict'] } # step 1_4: build the mlp for the propogation between nodes MLP_prop_shape = self._network_shape + \ [self._hidden_dim] + [self._hidden_dim] self._MLP_prop = { i_edge: nn.MLP(MLP_prop_shape, init_method=self._init_method, act_func=['tanh'] * (len(MLP_prop_shape) - 1), add_bias=True, scope='MLP_prop_edge_{}'.format(i_edge)) for i_edge in self._node_info['edge_type_list'] } # step 1_5: build the node update function for each node type if self._node_update_method == 'GRU': self._Node_update = { i_node_type: nn.GRU(self._hidden_dim, self._hidden_dim, init_method=self._init_method, scope='GRU_node_{}'.format(i_node_type)) for i_node_type in self._node_info['node_type_dict'] } else: assert self._node_update_method == 'MLP' hidden_MLP_update_shape = self._network_shape self._Node_update = { i_node_type: nn.MLPU(message_dim=self._hidden_dim, embedding_dim=self._hidden_dim, hidden_shape=hidden_MLP_update_shape, init_method=self._init_method, act_func_type='tanh', add_bias=True, scope='MLPU_node_{}'.format(i_node_type)) for i_node_type in self._node_info['node_type_dict'] } # step 1_6: the mlp for the mu of the actions MLP_out_shape = self._network_shape + [1] + \ [self._hidden_dim] # (l_1, l_2, ..., l_o, l_i) MLP_out_act_func = ['tanh'] * (len(MLP_out_shape) - 1) if self.bayesian_op: self._MLP_Out = nn.MLP(MLP_out_shape, init_method=self._init_method, act_func=MLP_out_act_func, add_bias=True, scope='MLP_out', use_dropout=True) self.test_dropout_mask = [] for feat_shape in [self._hidden_dim] + self._network_shape: # [64, 64, 1, 64] self.test_dropout_mask.append( tf.placeholder(tf.float32, shape=[1, feat_shape])) self.dropout_mask_shape = [self._hidden_dim ] + self._network_shape else: self.test_dropout_mask = [] self.dropout_mask_shape = None self._MLP_Out = nn.MLP(MLP_out_shape, init_method=self._init_method, act_func=MLP_out_act_func, add_bias=True, scope='MLP_out')