def build_critic(self): critic_embedding = embed_seq(input_seq=self.input_, from_=self.dimension, to_=self.input_embed, is_training=self.is_training, BN=True, initializer=self.initializer) critic_encoding = encode_seq(input_seq=critic_embedding, input_dim=self.input_embed, num_stacks=self.num_stacks, num_heads=self.num_heads, num_neurons=self.num_neurons, is_training=self.is_training) frame = full_glimpse( ref=critic_encoding, from_=self.input_embed, to_=self.num_units, initializer=tf.contrib.layers.xavier_initializer( )) # Glimpse on critic_encoding [Batch_size, input_embed] with tf.variable_scope("ffn"): # 2 dense layers for predictions h0 = tf.layers.dense(frame, self.num_neurons_critic, activation=tf.nn.relu, kernel_initializer=self.initializer) w1 = tf.get_variable("w1", [self.num_neurons_critic, 1], initializer=self.initializer) b1 = tf.Variable(self.init_B, name="b1") self.predictions = tf.squeeze(tf.matmul(h0, w1) + b1) tf.summary.scalar('predictions_mean', tf.reduce_mean(self.predictions))
def forward(X, reuse=None): with tf.variable_scope('embed_seq', reuse=reuse): encoded = embed_seq(X, self.vocab_size, self.hidden_units, zero_pad=True, scale=True) with tf.variable_scope('pos_enc', reuse=reuse): encoded += learned_positional_encoding(X, self.hidden_units, zero_pad=False, scale=False) encoded = tf.layers.dropout(encoded, self.dropout_rate, training=self.is_training) for i in range(self.n_layers): with tf.variable_scope('attn%d' % i, reuse=reuse): encoded = self_multihead_attn( queries=encoded, keys=encoded, num_units=self.hidden_units, num_heads=self.num_heads, dropout_rate=self.dropout_rate, is_training=self.is_training) with tf.variable_scope('feedforward%d' % i, reuse=reuse): encoded = pointwise_feedforward( encoded, num_units=[4 * self.hidden_units, self.hidden_units], activation=tf.nn.elu) return tf.layers.dense(encoded, self.vocab_size)
def add_forward_path(self): with tf.variable_scope('encoder_embedding'): encoded = embed_seq(self.X, self.vocab_size, self.hidden_units, zero_pad=False, scale=True) with tf.variable_scope('encoder_positional_encoding'): encoded += learned_positional_encoding(self.X, self.hidden_units, zero_pad=False, scale=False) with tf.variable_scope('encoder_dropout'): encoded = tf.layers.dropout(encoded, self.dropout_rate, training=self.is_training) for i in range(self.num_blocks): with tf.variable_scope('encoder_attn_%d' % i): encoded = multihead_attn(queries=encoded, keys=encoded, num_units=self.hidden_units, num_heads=self.num_heads, dropout_rate=self.dropout_rate, is_training=self.is_training) with tf.variable_scope('encoder_feedforward_%d' % i): encoded = pointwise_feedforward( encoded, num_units=[self.hidden_units, self.hidden_units], activation=tf.nn.elu) self.logits = tf.layers.dense(encoded, self.n_out)
def encode_decode(self): actor_embedding = embed_seq(input_seq=self.input_, from_=self.dimension, to_=self.input_embed, is_training=self.is_training, BN=True, initializer=self.initializer) actor_encoding = encode_seq(input_seq=actor_embedding, input_dim=self.input_embed, num_stacks=self.num_stacks, num_heads=self.num_heads, num_neurons=self.num_neurons, is_training=self.is_training) if self.is_training == False: actor_encoding = tf.tile(actor_encoding, [self.batch_size, 1, 1]) idx_list, log_probs, entropies = [], [], [ ] # tours index, log_probs, entropies mask = tf.zeros((self.batch_size, self.max_length)) # mask for actions n_hidden = actor_encoding.get_shape().as_list()[2] # input_embed W_ref = tf.get_variable("W_ref", [1, n_hidden, self.num_units], initializer=self.initializer) W_q = tf.get_variable("W_q", [self.query_dim, self.num_units], initializer=self.initializer) v = tf.get_variable("v", [self.num_units], initializer=self.initializer) encoded_ref = tf.nn.conv1d( actor_encoding, W_ref, 1, "VALID" ) # actor_encoding is the ref for actions [Batch size, seq_length, n_hidden] query1 = tf.zeros((self.batch_size, n_hidden)) # initial state query2 = tf.zeros((self.batch_size, n_hidden)) # previous state query3 = tf.zeros( (self.batch_size, n_hidden)) # previous previous state W_1 = tf.get_variable( "W_1", [n_hidden, self.query_dim], initializer=self.initializer) # update trajectory (state) W_2 = tf.get_variable("W_2", [n_hidden, self.query_dim], initializer=self.initializer) W_3 = tf.get_variable("W_3", [n_hidden, self.query_dim], initializer=self.initializer) for step in range(self.max_length): # sample from POINTER query = tf.nn.relu( tf.matmul(query1, W_1) + tf.matmul(query2, W_2) + tf.matmul(query3, W_3)) logits = pointer(encoded_ref=encoded_ref, query=query, mask=mask, W_ref=W_ref, W_q=W_q, v=v, C=config.C, temperature=config.temperature) prob = distr.Categorical(logits) # logits = masked_scores idx = prob.sample() idx_list.append(idx) # tour index log_probs.append(prob.log_prob(idx)) # log prob entropies.append(prob.entropy()) # entropies mask = mask + tf.one_hot(idx, self.max_length) # mask idx_ = tf.stack([tf.range(self.batch_size, dtype=tf.int32), idx], 1) # idx with batch query3 = query2 query2 = query1 query1 = tf.gather_nd(actor_encoding, idx_) # update trajectory (state) idx_list.append(idx_list[0]) # return to start self.tour = tf.stack(idx_list, axis=1) # permutations self.log_prob = tf.add_n( log_probs) # corresponding log-probability for backprop self.entropies = tf.add_n(entropies) tf.summary.scalar('log_prob_mean', tf.reduce_mean(self.log_prob)) tf.summary.scalar('entropies_mean', tf.reduce_mean(self.entropies))