示例#1
0
 def testFunctionalFC(self):
   inputs = tf.random_uniform((5, 3), seed=1)
   outputs = core_layers.fully_connected(
       inputs, 2, activation=tf.nn.relu, name='fc')
   self.assertEqual(
       len(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)), 2)
   self.assertEqual(outputs.op.name, 'fc/Relu')
   self.assertEqual(outputs.get_shape().as_list(), [5, 2])
def rr_rnn(config, wx, wx_num, cx, cx_num, sx, sx_num, n_unit, is_training):
    keep_prob = config.keep_prob
    max_word_num = wx.get_shape()[1].value

    with tf.variable_scope('char_neocde') as scope:
        cx_e = char_rnn_encoder(config, cx, cx_num, max_word_num)
    with tf.variable_scope('syll_encode') as scope:
        sx_e = char_rnn_encoder(config, sx, sx_num, max_word_num)
    wx_e = tf.concat([wx, cx_e, sx_e], axis=2)
    wx_e = fully_connected(wx_e, n_unit)

    with tf.variable_scope('dropout'):
        wx_e = layers.dropout(wx_e,
                              keep_prob=keep_prob,
                              is_training=is_training)

    cell = MultiRNNCell([GRUCell(n_unit)] * config.cell_stack_count)
    _, out = bidirectional_rnn(cell, cell, wx_e, wx_num)

    return out[0], None
示例#3
0
 def testFunctionalFCInScope(self):
     with tf.variable_scope('test'):
         inputs = tf.random_uniform((5, 3), seed=1)
         core_layers.fully_connected(inputs, 2, name='fc')
         var = tf.trainable_variables()[0]
         self.assertEqual(var.name, 'test/fc/weights:0')
     with tf.variable_scope('test1') as scope:
         inputs = tf.random_uniform((5, 3), seed=1)
         core_layers.fully_connected(inputs, 2, name=scope)
         var = tf.trainable_variables()[2]
         self.assertEqual(var.name, 'test1/weights:0')
     with tf.variable_scope('test2'):
         inputs = tf.random_uniform((5, 3), seed=1)
         core_layers.fully_connected(inputs, 2)
         var = tf.trainable_variables()[4]
         self.assertEqual(var.name, 'test2/fully_connected/weights:0')
示例#4
0
def merge_weight_predict(is_train,
                         context_rep,
                         question_rep,
                         context_mask,
                         merger,
                         post_merger,
                         max_pool,
                         predictor,
                         answer,
                         multiply_probs=None):
    with tf.variable_scope("merger"):
        c_q_merged_rep = merger.apply(is_train,
                                      tensor=context_rep,
                                      fixed_tensor=question_rep,
                                      mask=context_mask)

    if post_merger is not None:
        with tf.variable_scope("post_merger"):
            c_q_merged_rep = post_merger.apply(is_train,
                                               c_q_merged_rep,
                                               mask=context_mask)

    with tf.variable_scope("sentence_level_predictions"):
        sentences_logits = fully_connected(
            c_q_merged_rep,
            1,
            use_bias=True,
            activation=None,
            kernel_initializer=get_keras_initialization('glorot_uniform'))
        max_logits = max_pool.apply(is_train, sentences_logits, context_mask)

        if multiply_probs is not None:
            max_logits = tf.log(multiply_probs +
                                EPSILON) - tf.log(1. + tf.exp(-max_logits) -
                                                  multiply_probs + EPSILON)

    with tf.variable_scope("predictor"):
        pred = predictor.apply(is_train, max_logits, answer)
    return c_q_merged_rep, sentences_logits, pred
示例#5
0
    def apply(self, is_train, tensor1, tensor2):
        init = get_keras_initialization(self.init)
        with tf.variable_scope('merge'):
            merged = self.merge.apply(is_train, tensor1, tensor2)

        keys = merged if self.weight_context else tensor1
        keys_shape = keys.shape.as_list()

        if self.weight_mode == 'per_encoding':
            with tf.variable_scope('weighting'):
                weights = tf.get_variable('weights',
                                          shape=[keys_shape[1], keys_shape[2]],
                                          initializer=init)
                biases = tf.get_variable('biases',
                                         shape=[keys_shape[1]],
                                         initializer=tf.zeros_initializer())
                unnormalized_alphas = tf.einsum('btd,td->bt', keys,
                                                weights) + biases
                normalized_alphas = tf.nn.softmax(unnormalized_alphas, axis=-1)
                weighted_rep = tf.expand_dims(normalized_alphas,
                                              axis=-1) * merged
        elif self.weight_mode == 'fully_connected':
            with tf.variable_scope('weighting'):
                flattened = tf.layers.flatten(keys)
                unnormalized_alphas = fully_connected(flattened,
                                                      units=keys_shape[1],
                                                      kernel_initializer=init)
                normalized_alphas = tf.nn.softmax(unnormalized_alphas, axis=-1)
                weighted_rep = tf.expand_dims(normalized_alphas,
                                              axis=-1) * merged
        else:
            raise NotImplementedError()

        if self.encode == 'sum':
            return tf.reduce_sum(weighted_rep, axis=1)
        elif self.encode == 'concat':
            return tf.layers.flatten(weighted_rep)
def rr_swide(config, wx, wx_num, cx, cx_num, sx, sx_num, n_unit, is_training):

    max_word_num = wx.get_shape()[1]
    keep_prob = config.keep_prob

    with tf.variable_scope('char_encode') as scope:
        cx_e = char_rnn_encoder(config, cx, cx_num, max_word_num)
    with tf.variable_scope('syll_encode') as scope:
        sx_e = char_rnn_encoder(config, sx, sx_num, max_word_num)

    wx_e = tf.concat([wx, cx_e, sx_e], axis=2)
    wx_e = fully_connected(wx_e, config.rnn_dim)

    with tf.variable_scope('dropout'):
        wx_e = layers.dropout(
            wx_e,
            keep_prob=keep_prob,
            is_training=is_training,
        )

    with tf.variable_scope('shallow_cnn') as scope:
        swc = shallow_wide_cnn(wx_e, (3, 4, 5), 100)

    return swc, None
示例#7
0
 def apply(self, is_train, x, mask=None):
     gated = fully_connected(x, x.shape.as_list()[-1], activation=tf.nn.sigmoid,
                             bias_initializer=tf.constant_initializer(self.bias) if self.bias else None,
                             kernel_initializer=get_keras_initialization(self.init), name="compute-gate")
     return gated * x
示例#8
0
 def apply(self, is_train, x, mask=None):
     return fully_connected(x, x.shape.as_list()[-1],
                            use_bias=self.bias,
                            activation=activations.get(self.activation),
                            kernel_initializer=_wrap_init(initializers.get(self.w_init)))
示例#9
0
 def apply(self, is_train, x, mask=None):
     bias = (self.bias is None) or self.bias  # for backwards compat
     return fully_connected(x, self.n_out,
                            use_bias=bias,
                            activation=get_keras_activation(self.activation),
                            kernel_initializer=_wrap_init(initializers.get(self.w_init)))
示例#10
0
    def _get_predictions_for(self, is_train, question_embed, question_mask,
                             context_embed, context_mask, answer, question_lm,
                             context_lm, sentence_segments, sentence_mask):
        question_rep, context_rep = question_embed, context_embed
        context1_rep, context2_rep = tf.unstack(context_rep, axis=1, num=2)
        context1_mask, context2_mask = tf.unstack(context_mask, axis=1, num=2)
        context1_sentence_segments, context2_sentence_segments = tf.unstack(
            sentence_segments, axis=1, num=2)
        context1_sentence_mask, context2_sentence_mask = tf.unstack(
            sentence_mask, axis=1, num=2)
        q_lm_in, c1_lm_in, c2_lm_in = [], [], []
        if self.use_elmo:
            context1_lm, context2_lm = tf.unstack(context_lm, axis=1, num=2)
            q_lm_in = [question_lm]
            c1_lm_in = [context1_lm]
            c2_lm_in = [context2_lm]
        if self.embed_mapper is not None:
            with tf.variable_scope("map_embed"):
                context1_rep = self.embed_mapper.apply(is_train, context1_rep,
                                                       context1_mask,
                                                       *c1_lm_in)
            with tf.variable_scope("map_embed", reuse=True):
                context2_rep = self.embed_mapper.apply(is_train, context2_rep,
                                                       context2_mask,
                                                       *c2_lm_in)
                question_rep = self.embed_mapper.apply(is_train, question_rep,
                                                       question_mask, *q_lm_in)

        with tf.variable_scope("seq_enc"):
            question_enc = self.sequence_encoder.apply(is_train, question_rep,
                                                       question_mask)
            question_enc = tf.identity(question_enc, name='encode_question')
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, question_enc)

        def encode_sentences(context, sentence_segs, sentence_mask, rep_name):
            context = self.sentences_encoder.apply(context, sentence_segs,
                                                   sentence_mask)
            if self.sentence_mapper is not None:
                with tf.variable_scope('sentence_mapper'):
                    context = self.sentence_mapper.apply(is_train,
                                                         context,
                                                         mask=sentence_mask)
            context = tf.identity(context, name=rep_name)
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, context)
            return context

        with tf.variable_scope('sentences_enc'):
            context1_sent_rep = encode_sentences(context1_rep,
                                                 context1_sentence_segments,
                                                 context1_sentence_mask,
                                                 'encode_context1')
        with tf.variable_scope('sentences_enc', reuse=True):
            context2_sent_rep = encode_sentences(context2_rep,
                                                 context2_sentence_segments,
                                                 context2_sentence_mask,
                                                 'encode_context2')

        # First Iteration (same as in the single context model)
        with tf.variable_scope("context1_relevance"):
            c1_q_merged_rep, context1_sentences_logits, context1_pred = \
                merge_weight_predict(is_train=is_train, context_rep=context1_sent_rep, question_rep=question_enc,
                                     context_mask=context1_sentence_mask, merger=self.merger,
                                     post_merger=self.post_merger, max_pool=self.max_pool,
                                     predictor=self.predictor, answer=[answer[0]])

        # Question Reformulation
        with tf.variable_scope("reformulation"):
            with tf.variable_scope('c2q'):
                question_rep = self.context_to_question_attention.apply(
                    is_train,
                    x=question_rep,
                    keys=context1_rep,
                    memories=context1_rep,
                    x_mask=question_mask,
                    memory_mask=context1_mask)
                reread_q_enc = self.sequence_encoder.apply(
                    is_train, question_rep, question_mask)
            with tf.variable_scope('q2c'):
                context1_rep = self.question_to_context_attention.apply(
                    is_train,
                    x=context1_rep,
                    keys=question_rep,
                    memories=question_rep,
                    x_mask=context1_mask,
                    memory_mask=question_mask)
                reread_c1_enc = self.sequence_encoder.apply(
                    is_train, context1_rep, context1_mask)
            with tf.variable_scope('reread_merge'):
                reformulated_q = self.reread_merger.apply(
                    is_train, reread_q_enc, reread_c1_enc)
                reformulated_q = fully_connected(
                    reformulated_q,
                    c1_q_merged_rep.shape.as_list()[-1],
                    use_bias=True,
                    activation=get_keras_activation('relu'),
                    kernel_initializer=get_keras_initialization(
                        'glorot_uniform'))
            reformulated_q = tf.identity(reformulated_q,
                                         name='reformulated_question')
            tf.add_to_collection(INTERMEDIATE_LAYER_COLLECTION, reformulated_q)

        # Second Iteration
        with tf.variable_scope("context2_relevance"):
            first_iter_probs = None
            if self.multiply_iteration_probs:
                first_iter_probs = tf.expand_dims(context1_pred.get_probs(),
                                                  axis=1)
            c2_q_merged_rep, context2_sentences_logits, context2_pred = \
                merge_weight_predict(is_train=is_train, context_rep=context2_sent_rep, question_rep=reformulated_q,
                                     context_mask=context2_sentence_mask, merger=self.merger,
                                     post_merger=self.post_merger, max_pool=self.max_pool,
                                     predictor=self.predictor, answer=[answer[1]], multiply_probs=first_iter_probs)

        return MultipleBinaryPredictions([context1_pred, context2_pred])
                                    (character_size, char_dim))
    char_embed = tf.nn.embedding_lookup(char_embedder, chars)

    syll_embedder = tf.get_variable('syll_embedder', (syll_size, syll_dim))
    syll_embed = tf.nn.embedding_lookup(syll_embedder, sylls)

    from core_layer import han1_syll_cnn_char_rnn, han1_syll_cnn_char_cnn
    core_layer_output = han1_syll_cnn_char_cnn(config, word_embed, sent_len,
                                               char_embed, word_len,
                                               syll_embed, None, fc_dim,
                                               is_training)

    with tf.variable_scope("output"):
        output = fully_connected(
            core_layer_output,
            fc_dim,
            use_bias=True,
            activation=activations.get("relu"),
            kernel_initializer=initializers.get("glorot_uniform"))
        output = layers.dropout(output,
                                keep_prob=config.keep_prob,
                                is_training=is_training)
        output = fully_connected(
            output,
            1,
            use_bias=True,
            activation=None,
            kernel_initializer=initializers.get("glorot_uniform"))

    y_logits = tf.sigmoid(output) * 9 + 1
    predictions = y_logits
    acc = tf.reduce_mean(
示例#12
0
    def _get_predictions_for(self, is_train, question_embed, question_mask,
                             context_embed, context_mask, answer, question_lm,
                             context_lm, sentence_segments, sentence_mask):
        question_rep, context_rep = question_embed, context_embed
        context_rep, = tf.unstack(context_rep, axis=1, num=1)
        context_mask, = tf.unstack(context_mask, axis=1, num=1)
        context_sentence_segments, = tf.unstack(sentence_segments,
                                                axis=1,
                                                num=1)
        context_sentence_mask, = tf.unstack(sentence_mask, axis=1, num=1)
        q_lm_in, c_lm_in = [], []
        if self.use_elmo:
            context_lm, = tf.unstack(context_lm, axis=1, num=1)
            q_lm_in = [question_lm]
            c_lm_in = [context_lm]
        if self.embed_mapper is not None:
            with tf.variable_scope("map_embed"):
                context_rep = self.embed_mapper.apply(is_train, context_rep,
                                                      context_mask, *c_lm_in)
            with tf.variable_scope("map_embed", reuse=True):
                question_rep = self.embed_mapper.apply(is_train, question_rep,
                                                       question_mask, *q_lm_in)

        with tf.variable_scope('yes_no_question_prediction'):
            yes_no_q_enc = self.yes_no_question_encoder.apply(
                is_train, question_rep, question_mask)
            yes_no_choice_logits = fully_connected(
                yes_no_q_enc,
                2,
                use_bias=True,
                activation=None,
                kernel_initializer=get_keras_initialization('glorot_uniform'),
                name='yes_no_choice')

        if self.question_mapper is not None:
            with tf.variable_scope("map_question"):
                question_rep = self.question_mapper.apply(
                    is_train, question_rep, question_mask)

        if self.context_mapper is not None:
            with tf.variable_scope("map_context"):
                context_rep = self.context_mapper.apply(
                    is_train, context_rep, context_mask)

        with tf.variable_scope("buid_memories"):
            keys, memories = self.memory_builder.apply(is_train, question_rep,
                                                       question_mask)

        with tf.variable_scope("apply_attention"):
            context_rep = self.attention.apply(is_train, context_rep, keys,
                                               memories, context_mask,
                                               question_mask)

        if self.match_encoder is not None:
            with tf.variable_scope("process_attention"):
                context_rep = self.match_encoder.apply(is_train, context_rep,
                                                       context_mask)

        with tf.variable_scope('yes_no_answer_prediction'):
            yes_no_c_enc = self.yes_no_context_encoder.apply(
                is_train, context_rep, context_mask)
            yes_no_answer_logits = fully_connected(
                yes_no_c_enc,
                2,
                use_bias=True,
                activation=None,
                kernel_initializer=get_keras_initialization('glorot_uniform'),
                name='yes_no_answer')

        with tf.variable_scope('supporting_fact_prediction'):
            pre_context_sents = context_rep
            if self.pre_sp_mapper is not None:
                with tf.variable_scope('pre_sp_mapper'):
                    pre_context_sents = self.pre_sp_mapper.apply(
                        is_train, pre_context_sents, context_mask)
            context_sents = self.sentences_encoder.apply(
                pre_context_sents, context_sentence_segments,
                context_sentence_mask)
            context_sents = tf.identity(context_sents, name='debug')
            if self.sentence_mapper is not None:
                with tf.variable_scope('sentence_mapper'):
                    context_sents = self.sentence_mapper.apply(
                        is_train, context_sents, mask=context_sentence_mask)
            sentences_logits = fully_connected(
                context_sents,
                1,
                use_bias=True,
                activation=None,
                kernel_initializer=get_keras_initialization('glorot_uniform'),
                name='supporting_fact_fc')

        with tf.variable_scope("predict"):
            return self.predictor.apply(
                is_train,
                context_rep,
                answer,
                context_mask,
                yes_no_choice_logits=yes_no_choice_logits,
                yes_no_answer_logits=yes_no_answer_logits,
                sentence_logits=tf.squeeze(sentences_logits, axis=[2]),
                sentence_mask=context_sentence_mask)
示例#13
0
    cx_ = tf.placeholder(tf.int32, (None, max_word_num, max_char_num),
                         name='cx_')
    sx_ = tf.placeholder(tf.int32, (None, max_word_num, max_syll_num),
                         name='sx_')
    y_ = tf.placeholder(tf.int32, (None), name='y_')

    c_embed = tf.get_variable('c_embed', (character_size, char_dim))
    s_embed = tf.get_variable('s_embed', (syllable_size, syll_dim))

    cx = tf.nn.embedding_lookup(c_embed, cx_)
    sx = tf.nn.embedding_lookup(s_embed, sx_)

    core_output = cnn_char_syll(config, wx, cx, sx, is_training)
    preds = fully_connected(
        core_output,
        10,
        activation=activations.get('relu'),
        kernel_initializer=initializers.get('glorot_uniform'))
    pred = tf.argmax(preds, axis=1, output_type=tf.int32) + 1

    y_arr = tf.one_hot(y_, 10)

    acc = tf.reduce_mean(tf.to_float(tf.equal(pred, y_)))
    loss = tf.losses.mean_squared_error(y_arr, preds)
    mse = tf.losses.mean_squared_error(y_, pred)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    ##############################################################################################################

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
def rr_han(config, word_embed, sent_len, char_embed, word_len, syll_embed,
           syll_len, n_unit, is_training):
    '''
    HAN 1 layer with char rnn

    @ Input spec

    word_embed [batch_size, max_sent_len, word_dim]
    sent_len [batch_size]
    char_embed [batch_size, max_sent_len, max_word_len, char_dim]
    word_len [batch_size, max_sent_len]
    syll_embed [batch_size, max_sent_len, max_syll_len, syll_dim]
    syll_len [batch_size, max_sent_len]

    @ Output spec
    return [batch, n_unit]
    '''

    char_dim = config.char_dim
    syll_dim = config.syll_dim
    max_sent_len = config.max_sentence_length
    max_word_len = config.max_word_length
    max_syll_num = config.max_syll_num
    keep_prob = config.keep_prob
    rnn_dim = config.rnn_dim

    with tf.variable_scope('syll_rnn') as scope:
        cell_stack_count = 2
        syll_cell = MultiRNNCell([GRUCell(syll_dim)] * cell_stack_count)
        syll_embed = tf.cast(
            tf.reshape(syll_embed, [-1, max_syll_num, syll_dim]), tf.float32)
        syll_len = tf.reshape(syll_len, [-1])

        _, syll_rnn_embed = bidirectional_rnn(syll_cell,
                                              syll_cell,
                                              syll_embed,
                                              syll_len,
                                              scope=scope)

        syll_rnn_embed = tf.reshape(
            syll_rnn_embed,
            [-1, max_sent_len, syll_dim * 2 * cell_stack_count])

    with tf.variable_scope('char_rnn') as scope:
        cell_stack_count = 2
        char_cell = MultiRNNCell([GRUCell(char_dim)] * cell_stack_count)
        char_embed = tf.cast(
            tf.reshape(char_embed, [-1, max_word_len, char_dim]), tf.float32)
        word_len = tf.reshape(word_len, [-1])

        _, char_rnn_embed = bidirectional_rnn(char_cell,
                                              char_cell,
                                              char_embed,
                                              word_len,
                                              scope=scope)

        char_rnn_embed = tf.reshape(
            char_rnn_embed,
            [-1, max_sent_len, char_dim * 2 * cell_stack_count])

    word_char_concat = tf.concat([word_embed, char_rnn_embed, syll_rnn_embed],
                                 axis=2)

    with tf.variable_scope('embedding') as scope:
        word_char_embed = fully_connected(
            word_char_concat,
            rnn_dim,
            use_bias=True,
            activation=activations.get("relu"),
            kernel_initializer=initializers.get("glorot_uniform"))

        with tf.variable_scope('dropout'):
            word_char_embed = layers.dropout(
                word_char_embed,
                keep_prob=keep_prob,
                is_training=is_training,
            )

    with tf.variable_scope('encoder') as scope:
        cell = MultiRNNCell([GRUCell(rnn_dim)] * 3)
        encoder_output, _ = bidirectional_rnn(cell,
                                              cell,
                                              word_char_embed,
                                              sent_len,
                                              scope=scope)

        with tf.variable_scope('attention') as scope:
            attn_sum_output = task_specific_attention(encoder_output,
                                                      n_unit,
                                                      scope=scope)

        with tf.variable_scope('dropout'):
            attn_sum_output = layers.dropout(
                attn_sum_output,
                keep_prob=keep_prob,
                is_training=is_training,
            )

    return attn_sum_output