Пример #1
0
 def _build_output(self, output_dict):
     '''
     Take RNN outputs and produce logits over the vocab.
     '''
     outputs = output_dict['outputs']
     outputs = transpose_first_two_dims(outputs)  # (batch_size, seq_len, output_size)
     logits = batch_linear(outputs, self.num_symbols, True)
     #logits = BasicDecoder.penalize_repetition(logits)
     return logits
Пример #2
0
 def _score_context_linear(self, h, context, checklist):
     '''
     Concatenate state h and context, combine them to a vector, then project to a scalar.
     h: (batch_size, context_len, rnn_size)
     context: (batch_size, context_len, context_size)
     checklist: (batch_size, context_len, 1)
     Return context_scores (batch_size, context_len)
     '''
     attn_size = self.rnn_size
     with tf.variable_scope('ScoreContextLinear'):
         with tf.variable_scope('Combine'):
             if self.checklist:
                 feature = [h, context, checklist]
             else:
                 feature = [h, context]
             attns = activation(batch_linear(feature, attn_size, False))  # (batch_size, context_len, attn_size)
         with tf.variable_scope('Project'):
             attns = tf.squeeze(batch_linear(attns, 1, False), [2])  # (batch_size, context_len)
     return attns
Пример #3
0
 def select(self, init_output, context):
     context_len = tf.shape(context)[1]
     init_state = tf.tile(tf.expand_dims(init_output, 1), [1, context_len, 1])  # (batch_size, context_len, rnn_size)
     with tf.variable_scope('SelectEntity'):
         selection = batch_linear(tf.concat(2, [init_state, context]), 1, True)  # (batch_size, context_len, 1)
         selection_scores = tf.squeeze(selection, [2])
         selection = tf.sigmoid(selection)
         selected_context = tf.reduce_sum(tf.mul(selection, context), 1)  # (batch_size, context_size)
         # Normalize
         selected_context = tf.div(selected_context, (tf.reduce_sum(selection, 1) + EPS))
     return selected_context, selection_scores
Пример #4
0
 def _score_context_bilinear(self, h, context):
     '''
     Project h to context_size then do dot-product with context.
     h: (batch_size, context_len, rnn_size)
     context: (batch_size, context_len, context_size)
     Return context_scores (batch_size, context_len)
     '''
     context_size = context.get_shape().as_list()[-1]
     with tf.variable_scope('ScoreContextBilinear'):
         h = batch_linear(h, context_size, False)  # (batch_size, context_len, context_size)
         attns = tf.reduce_sum(tf.mul(h, context), 2)  # (batch_size, context_len)
     return attns
Пример #5
0
 def embed_path(self, node_embedding, edge_embedding, paths):
     '''
     Compute embedding of a path (edge_label, node_id).
     node_embedding: (batch_size, num_nodes, node_embed_size)
     edge_embedding: (num_edge_label, edge_embed_size)
     paths: each path is a tuple of (node_id, edge_label, node_id).
     (batch_size, num_paths, 3)
     '''
     edge_embeds = tf.nn.embedding_lookup(edge_embedding, paths[:, :, 1])
     node_embeds = batch_embedding_lookup(node_embedding, paths[:, :, 2])
     path_embed_size = self.config.node_embed_size
     path_embeds = activation(batch_linear([edge_embeds, node_embeds], path_embed_size, True))
     return path_embeds
Пример #6
0
    def _update_utterance(self, entity_indices, utterance, curr_utterances):
        '''
        We first transform utterance into a dense matrix of the same size as curr_utterances,
        then return their sum.
        entity_indices: entity ids correponding to rows to be updated in the curr_utterances
        (batch_size, entity_cache_size)
        utterance: hidden states from the RNN
        (batch_size, utterance_size)
        NOTE: each curr_utterance matrix should have a row (e.g. the last one) as padded utterance.
        Padded entities in entity_indices corresponds to the padded utterance. This is handled
        by GraphBatch during construnction of the input data.
        '''
        entity_inds_shape = tf.shape(entity_indices)
        B = entity_inds_shape[0]  # batch_size is a variable
        E = entity_inds_shape[1]  # number of entities to be updated
        U = self.config.utterance_size
        # Construct indices corresponding to each entry to be updated in self.utterances
        # self.utterance has shape (batch_size, num_nodes, utterance_size)
        # Therefore each row in the indices matrix specifies (batch_id, node_id, utterance_dim)
        batch_inds = tf.reshape(
            tf.tile(tf.reshape(tf.range(B), [-1, 1]), [1, E * U]), [-1, 1])
        node_inds = tf.reshape(
            tf.tile(tf.reshape(entity_indices, [-1, 1]), [1, U]), [-1, 1])
        utterance_inds = tf.reshape(tf.tile(tf.range(U), [E * B]), [-1, 1])
        inds = tf.concat(1, [batch_inds, node_inds, utterance_inds])

        # Repeat utterance for each entity
        utterance = tf.reshape(tf.tile(utterance, [1, E]), [-1])
        new_utterance = tf.sparse_to_dense(inds,
                                           tf.shape(curr_utterances),
                                           utterance,
                                           validate_indices=False)

        if self.config.learned_decay:
            with tf.variable_scope('UpdateUtterance',
                                   reuse=self.update_initialized):
                weight = tf.sigmoid(
                    batch_linear(
                        tf.concat(2, [curr_utterances, new_utterance]), 1,
                        True))  # (batch_size, num_nodes, 1)
                if not self.update_initialized:
                    self.update_initialized = True

        if self.config.learned_decay:
            return tf.mul(1 - weight, curr_utterances) + tf.mul(
                weight, new_utterance)
        else:
            return curr_utterances * self.config.decay + new_utterance