Пример #1
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:  # 计算字符emb
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int,
                            config.out_channel_dims.split(',')))  # TODO What?
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            with tf.variable_scope(tf.get_variable_scope(),
                                                   reuse=True):
                                qq = multi_conv1d(Acq,
                                                  filter_sizes,
                                                  heights,
                                                  "VALID",
                                                  self.is_train,
                                                  config.keep_prob,
                                                  scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(
                                config.emb_mat))  # emb_mat is glove
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            [word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    qq = highway_network(qq,
                                         config.highway_num_layers,
                                         True,
                                         wd=config.wd,
                                         is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat([fw_u, bw_u], 2)
            if config.share_lstm_weights:
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell, cell, xx, x_len, dtype='float',
                        scope='u1')  # [N, M, JX, 2d] TODO JX == x_len?
                    h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat([fw_g0, bw_g0], 3)
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat([fw_g1, bw_g1], 3)

            # Output Layer
            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell,
                d_cell,
                tf.concat([p0, g1, a1i, g1 * a1i], 3),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat([fw_g2, bw_g2], 3)
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [-1, M, JX])
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Пример #2
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        print("VW:", VW, "N:", N, "M:", M, "JX:", JX, "JQ:", JQ)
        JA = config.max_answer_length
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        print("VW:", VW, "N:", N, "M:", M, "JX:", JX, "JQ:", JQ)
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            # Char-CNN Embedding
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            # Word Embedding
            if config.use_word_emb:
                with tf.variable_scope("emb_var") as scope, tf.device(
                        "/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    tf.get_variable_scope().reuse_variables()
                    self.word_emb_scope = scope
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            [word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                # Concat Char-CNN Embedding and Word Embedding
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

            # exact match
            if config.use_exact_match:
                emx = tf.expand_dims(tf.cast(self.emx, tf.float32), -1)
                xx = tf.concat([xx, emx], 3)  # [N, M, JX, di+1]
                emq = tf.expand_dims(tf.cast(self.emq, tf.float32), -1)
                qq = tf.concat([qq, emq], 2)  # [N, JQ, di+1]

        # 2 layer highway network on Concat Embedding
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        # Bidirection-LSTM (3rd layer on paper)
        cell = GRUCell(d) if config.GRU else BasicLSTMCell(d,
                                                           state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), _ = bidirectional_dynamic_rnn(
                d_cell, d_cell, qq, q_len, dtype='float',
                scope='u1')  # [N, J, d], [N, d]
            u = tf.concat([fw_u, bw_u], 2)
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        # Attention Flow Layer (4th layer on paper)
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    size=d,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell

        # Modeling layer (5th layer on paper)
            tp0 = p0
            for layer_idx in range(config.LSTM_num_layers - 1):
                (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                    first_cell,
                    first_cell,
                    p0,
                    x_len,
                    dtype='float',
                    scope="g_{}".format(layer_idx))  # [N, M, JX, 2d]
                p0 = tf.concat([fw_g0, bw_g0], 3)
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat([fw_g1, bw_g1], 3)  # [N, M, JX, 2d]

        # Self match layer
        with tf.variable_scope("SelfMatch"):
            s0 = tf.reshape(g1, [N * M, JX, 2 * d])  # [N * M, JX, 2d]
            x_mask = tf.reshape(self.x_mask, [N * M, JX])
            first_cell = AttentionCell(cell,
                                       s0,
                                       size=d,
                                       mask=x_mask,
                                       is_train=self.is_train)
            (fw_s, bw_s), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(
                first_cell, first_cell, s0, x_len, dtype='float',
                scope='s')  # [N, M, JX, 2d]
            s1 = tf.concat([fw_s, bw_s], 2)  # [N * M, JX, 2d], M == 1

            # prepare for PtrNet
            encoder_output = tf.expand_dims(s1, 1)  # [N, M, JX, 2d]
            encoder_output = tf.expand_dims(
                tf.cast(self.x_mask,
                        tf.float32), -1) * encoder_output  # [N, M, JX, 2d]

            if config.GRU:
                encoder_state_final = tf.concat((fw_s_f, bw_s_f),
                                                1,
                                                name='encoder_concat')
            else:
                if isinstance(fw_s_f, LSTMStateTuple):
                    encoder_state_c = tf.concat((fw_s_f.c, bw_s_f.c),
                                                1,
                                                name='encoder_concat_c')
                    encoder_state_h = tf.concat((fw_s_f.h, bw_s_f.h),
                                                1,
                                                name='encoder_concat_h')
                    encoder_state_final = LSTMStateTuple(c=encoder_state_c,
                                                         h=encoder_state_h)
                elif isinstance(fw_s_f, tf.Tensor):
                    encoder_state_final = tf.concat((fw_s_f, bw_s_f),
                                                    1,
                                                    name='encoder_concat')
                else:
                    encoder_state_final = None
                    tf.logging.error("encoder_state_final not set")

            print("encoder_state_final:", encoder_state_final)

        with tf.variable_scope("output"):
            # eos_symbol = config.eos_symbol
            # next_symbol = config.next_symbol

            tf.assert_equal(
                M,
                1)  # currently dynamic M is not supported, thus we assume M==1
            answer_string = tf.placeholder(
                shape=(N, 1, JA + 1), dtype=tf.int32,
                name='answer_string')  # [N, M, JA + 1]
            answer_string_mask = tf.placeholder(
                shape=(N, 1, JA + 1), dtype=tf.bool,
                name='answer_string_mask')  # [N, M, JA + 1]
            answer_string_length = tf.placeholder(
                shape=(N, 1),
                dtype=tf.int32,
                name='answer_string_length',
            )  # [N, M]
            self.tensor_dict['answer_string'] = answer_string
            self.tensor_dict['answer_string_mask'] = answer_string_mask
            self.tensor_dict['answer_string_length'] = answer_string_length
            self.answer_string = answer_string
            self.answer_string_mask = answer_string_mask
            self.answer_string_length = answer_string_length

            answer_string_flattened = tf.reshape(answer_string,
                                                 [N * M, JA + 1])
            self.answer_string_flattened = answer_string_flattened  # [N * M, JA+1]
            print("answer_string_flattened:", answer_string_flattened)

            answer_string_length_flattened = tf.reshape(
                answer_string_length, [N * M])
            self.answer_string_length_flattened = answer_string_length_flattened  # [N * M]
            print("answer_string_length_flattened:",
                  answer_string_length_flattened)

            decoder_cell = GRUCell(2 * d) if config.GRU else BasicLSTMCell(
                2 * d, state_is_tuple=True)

            with tf.variable_scope("Decoder"):
                decoder_train_logits = ptr_decoder(
                    decoder_cell,
                    tf.reshape(tp0, [N * M, JX, 2 * d]),  # [N * M, JX, 2d]
                    tf.reshape(encoder_output,
                               [N * M, JX, 2 * d]),  # [N * M, JX, 2d]
                    encoder_final_state=encoder_state_final,
                    max_encoder_length=config.sent_size_th,
                    decoder_output_length=
                    answer_string_length_flattened,  # [N * M]
                    batch_size=N,  # N * M (M=1)
                    attention_proj_dim=self.config.decoder_proj_dim,
                    scope='ptr_decoder'
                )  # [batch_size, dec_len*, enc_seq_len + 1]

                self.decoder_train_logits = decoder_train_logits
                print("decoder_train_logits:", decoder_train_logits)
                self.decoder_train_softmax = tf.nn.softmax(
                    self.decoder_train_logits)
                self.decoder_inference = tf.argmax(
                    decoder_train_logits, axis=2,
                    name='decoder_inference')  # [N, JA + 1]

            self.yp = tf.ones([N, M, JX], dtype=tf.int32) * -1
            self.yp2 = tf.ones([N, M, JX], dtype=tf.int32) * -1
Пример #3
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])
                    print(word_emb_mat.get_shape().as_list())

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq

                xx = Ax
                qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0, p1 = attention_layer(config,
                                         self.is_train,
                                         h,
                                         u,
                                         h_mask=self.x_mask,
                                         u_mask=self.q_mask,
                                         scope="p0",
                                         tensor_dict=self.tensor_dict)
                first_cell = d_cell
        # with tf.variable_scope("activate"):
        #     p0 =  tf.nn.relu(_linear(tf.reshape(p0,[-1,1200]),300,bias=0.01,bias_start=0.0,scope='relu'))
        #     if config.share_lstm_weights:
        #         tf.get_variable_scope().reuse_variables()
        #         p1 =  tf.nn.relu(_linear(tf.reshape(p1,[-1,1200]),300,bias=0.01,bias_start=0.0,scope='relu'))
        with tf.variable_scope('two_lstm'):
            p0 = tf.reshape(p0, [N, 1, -1, 300])
            p1 = tf.reshape(p1, [N, 1, -1, 300])
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            q_len_new = tf.tile(tf.expand_dims(q_len, 1), [1, M])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                    first_cell,
                    first_cell,
                    p1,
                    q_len_new,
                    dtype='float',
                    scope='g0')  # [N, M, JX, 2d]
                g1 = tf.concat(3, [fw_g1, bw_g1])
        # with tf.variable_scope('two_lstm_1'):
        #     (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(first_cell, first_cell, g0, x_len, dtype='float', scope='g0')  # [N, M, JX, 2d]
        #     g2 = tf.concat(3, [fw_g2, bw_g2])
        #     q_len_new = tf.tile(tf.expand_dims(q_len,1),[1,M])
        #     if config.share_lstm_weights:
        #         tf.get_variable_scope().reuse_variables()
        #         (fw_g3, bw_g3), _ = bidirectional_dynamic_rnn(first_cell, first_cell, g1, q_len_new, dtype='float', scope='g0')  # [N, M, JX, 2d]
        #         g3 = tf.concat(3, [fw_g3, bw_g3])

            g0 = tf.reduce_sum(tf.reduce_max(g0, 2), 1)
            g1 = tf.reduce_sum(tf.reduce_max(g1, 2), 1)

        logits = _linear([g0, g1, tf.abs(tf.subtract(g0, g1)), g0 * g1],
                         2,
                         bias=0.01,
                         bias_start=0.0,
                         scope='logits1')

        flat_logits2 = tf.reshape(logits, [N, 2])

        yp = tf.nn.softmax(flat_logits2)  # [-1, M*JX]

        self.tensor_dict['g0'] = g0
        self.tensor_dict['g1'] = g1

        self.logits = flat_logits2

        self.yp = yp
Пример #4
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        beam_width = config.beam_width
        GO_TOKEN = 0
        EOS_TOKEN = 1

        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat),
                            trainable=True)
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])
                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), ((_, fw_h_f),
                               (_, bw_h_f)) = bidirectional_dynamic_rnn(
                                   cell_fw,
                                   cell_bw,
                                   xx,
                                   x_len,
                                   dtype='float',
                                   scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), ((_, fw_h_f),
                               (_, bw_h_f)) = bidirectional_dynamic_rnn(
                                   cell_fw,
                                   cell_bw,
                                   xx,
                                   x_len,
                                   dtype='float',
                                   scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                second_cell_fw,
                second_cell_bw,
                g0,
                x_len,
                dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)

            if config.na:
                na_bias = tf.get_variable("na_bias", shape=[], dtype='float')
                na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]),
                                        [N, 1])  # [N, 1]
                concat_flat_logits = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits])
                concat_flat_yp = tf.nn.softmax(concat_flat_logits)
                na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]),
                                     [1])
                flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1])

                concat_flat_logits2 = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits2])
                concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2)
                na_prob2 = tf.squeeze(
                    tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1])  # [N]
                flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1])

                self.concat_logits = concat_flat_logits
                self.concat_logits2 = concat_flat_logits2
                self.na_prob = na_prob * na_prob2

            yp = tf.reshape(flat_yp, [-1, M, JX])
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])
            wyp = tf.nn.sigmoid(logits2)

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
            self.wyp = wyp

        with tf.variable_scope("q_gen"):
            # Question Generation Using (Paragraph & Predicted Ans Pos)
            NM = config.max_num_sents * config.batch_size

            # Separated encoder
            #ss = tf.reshape(xx, (-1, JX, dw+dco))

            q_worthy = tf.reduce_sum(
                tf.to_int32(self.y), axis=2
            )  # so we get probability distribution of answer-likely. (N, M)
            q_worthy = tf.expand_dims(tf.to_int32(tf.argmax(q_worthy, axis=1)),
                                      axis=1)  # (N) -> (N, 1)
            q_worthy = tf.concat([
                tf.expand_dims(tf.range(0, N, dtype=tf.int32), axis=1),
                q_worthy
            ],
                                 axis=1)
            # example : [0, 9], [1, 11], [2, 8], [3, 5], [4, 0], [5, 1] ...

            ss = tf.gather_nd(xx, q_worthy)
            syp = tf.expand_dims(tf.gather_nd(yp, q_worthy), axis=-1)
            syp2 = tf.expand_dims(tf.gather_nd(yp2, q_worthy), axis=-1)
            ss_with_ans = tf.concat([ss, syp, syp2], axis=2)

            qg_dim = 600
            cell_fw, cell_bw = rnn.DropoutWrapper(rnn.GRUCell(qg_dim), input_keep_prob=config.input_keep_prob), \
                               rnn.DropoutWrapper(rnn.GRUCell(qg_dim), input_keep_prob=config.input_keep_prob)
            s_outputs, s_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw, cell_bw, ss_with_ans, dtype=tf.float32)
            s_outputs = tf.concat(s_outputs, axis=2)
            s_states = tf.concat(s_states, axis=1)

            start_tokens = tf.zeros([N], dtype=tf.int32)
            self.inp_q_with_GO = tf.concat(
                [tf.expand_dims(start_tokens, axis=1), self.q], axis=1)
            # supervise if mode is train
            if config.mode == "train":
                emb_q = tf.nn.embedding_lookup(params=word_emb_mat,
                                               ids=self.inp_q_with_GO)
                #emb_q = tf.reshape(tf.tile(tf.expand_dims(emb_q, axis=1), [1, M, 1, 1]), (NM, JQ+1, dw))
                train_helper = seq2seq.TrainingHelper(emb_q, [JQ] * N)
            else:
                s_outputs = seq2seq.tile_batch(s_outputs,
                                               multiplier=beam_width)
                s_states = seq2seq.tile_batch(s_states, multiplier=beam_width)

            cell = rnn.DropoutWrapper(rnn.GRUCell(num_units=qg_dim * 2),
                                      input_keep_prob=config.input_keep_prob)
            attention_mechanism = seq2seq.BahdanauAttention(num_units=qg_dim *
                                                            2,
                                                            memory=s_outputs)
            attn_cell = seq2seq.AttentionWrapper(cell,
                                                 attention_mechanism,
                                                 attention_layer_size=qg_dim *
                                                 2,
                                                 output_attention=True,
                                                 alignment_history=False)
            total_glove_vocab_size = 78878  #72686
            out_cell = rnn.OutputProjectionWrapper(attn_cell,
                                                   VW + total_glove_vocab_size)
            if config.mode == "train":
                decoder_initial_states = out_cell.zero_state(
                    batch_size=N, dtype=tf.float32).clone(cell_state=s_states)
                decoder = seq2seq.BasicDecoder(
                    cell=out_cell,
                    helper=train_helper,
                    initial_state=decoder_initial_states)
            else:
                decoder_initial_states = out_cell.zero_state(
                    batch_size=N * beam_width,
                    dtype=tf.float32).clone(cell_state=s_states)
                decoder = seq2seq.BeamSearchDecoder(
                    cell=out_cell,
                    embedding=word_emb_mat,
                    start_tokens=start_tokens,
                    end_token=EOS_TOKEN,
                    initial_state=decoder_initial_states,
                    beam_width=beam_width,
                    length_penalty_weight=0.0)
            outputs = seq2seq.dynamic_decode(decoder=decoder,
                                             maximum_iterations=JQ)
            if config.mode == "train":
                gen_q = outputs[0].sample_id
                gen_q_prob = outputs[0].rnn_output
                gen_q_states = outputs[1]
            else:
                gen_q = outputs[0].predicted_ids[:, :, 0]
                gen_q_prob = tf.nn.embedding_lookup(
                    params=word_emb_mat, ids=outputs[0].predicted_ids[:, :, 0])
                gen_q_states = outputs[1]

            self.gen_q = gen_q
            self.gen_q_prob = gen_q_prob
            self.gen_q_states = gen_q_states
Пример #5
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=tf.random_normal_initializer)
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell2_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell3_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        cell4_bw = LSTMCell(d, state_is_tuple=True, name="basic_lstm_cell")
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                second_cell_fw,
                second_cell_bw,
                g0,
                x_len,
                dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)

            if config.na:
                na_bias = tf.get_variable("na_bias", shape=[], dtype='float')
                na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]),
                                        [N, 1])  # [N, 1]
                concat_flat_logits = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits])
                concat_flat_yp = tf.nn.softmax(concat_flat_logits)
                na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]),
                                     [1])
                flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1])

                concat_flat_logits2 = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits2])
                concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2)
                na_prob2 = tf.squeeze(
                    tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1])  # [N]
                flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1])

                self.concat_logits = concat_flat_logits
                self.concat_logits2 = concat_flat_logits2
                self.na_prob = na_prob * na_prob2

            yp = tf.reshape(flat_yp, [-1, M, JX], name="yp")
            yp2 = tf.reshape(flat_yp2, [-1, M, JX], name="yp2")
            wyp = tf.nn.sigmoid(logits2, name="wyp")

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
            self.wyp = wyp
Пример #6
0
  def apply(self, context, context_st, query, query_st, q_sub_st, q_len, c_mask, cdoc_mask, q_mask, candidates, cand_mask, greedy_read=False, reuse=False):
    batch_size = self.batch_size
    hidden_dim = self.hidden_dim
    query_dim = self.query_dim
    self.docs_len = tf.reduce_sum(tf.cast(c_mask, 'int32'), 2)
    candidates = tf.squeeze(candidates, axis=1)
    c_state = tf.zeros((batch_size, query_dim))
    m_state = tf.zeros((batch_size, hidden_dim))
    a_state = tf.zeros((batch_size, hidden_dim))

    with tf.variable_scope('MACRnn'):
      
      with tf.variable_scope('q_sub_proj'):
        W = tf.get_variable('W', [query_dim, hidden_dim])
        b = tf.get_variable('b', [hidden_dim])
        m_state = tf.matmul(q_sub_st, W) + b

      self.c_history = []
      self.m_history = []
      self.a_history = []
      self.doc_attn_logits_lst = []
      self.word_attn_logits_lst = []
      self.doc_attn_weights_lst = []

      cell = tf.contrib.rnn.GRUCell(hidden_dim)
      self.cell = cell

      for i in range(self.num_hops):
        scope_str = 'MACRnn-layer-%d' % 0

        if self.read_strategy == 'one_doc_per_it_and_repeat_2nd_step' and i > 1:
          m_state = self.m_history[0]
          a_state = self.a_history[0]

        c_state, m_state, a_state, doc_attn_logits, doc_attn_weights, word_attn_logits = self.HierarchicalAttnMACCell(i, cell, query, query_st, q_sub_st, context, context_st, c_mask, cdoc_mask, \
          q_mask, self.c_history, self.m_history, c_state, m_state, a_state, scope_str, reuse=(reuse or i!=0), greedy_read=greedy_read)

        self.doc_attn_logits_lst.append(doc_attn_logits)
        self.word_attn_logits_lst.append(word_attn_logits)
        self.doc_attn_weights_lst.append(doc_attn_weights)
        self.c_history.append(c_state)
        self.m_history.append(m_state)
       

        if (self.reasoning_unit == 'concat_first_sent' or self.reasoning_unit == 'concat_full_doc') and i == self.num_hops - 1:
          with tf.variable_scope("concat_read_lstm", reuse=False):
            max_len = tf.reduce_max(self.concat_selected_doc_len)
            self.concat_selected_doc_mask = []
            for k in range(self.batch_size):
              concat_selected_doc_mask_k = tf.concat(values=[tf.ones([self.concat_selected_doc_len[k]]), tf.zeros([max_len-self.concat_selected_doc_len[k]])], axis=0)
              self.concat_selected_doc_mask.append(concat_selected_doc_mask_k)
            
            self.concat_selected_doc = tf.stack(self.concat_selected_doc, axis=0)
            self.concat_selected_doc_mask = tf.cast(tf.stack(self.concat_selected_doc_mask, axis=0), 'bool')
            p0 = biattention_layer(self.is_train, self.concat_selected_doc, query, h_mask=self.concat_selected_doc_mask, u_mask=q_mask)
            p0 = tf.squeeze(p0, axis=1)
            cell_fw = BasicLSTMCell(40, state_is_tuple=True)
            cell_bw = BasicLSTMCell(40, state_is_tuple=True)
            cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob=self.input_keep_prob)
            cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob=self.input_keep_prob)
            
            (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, p0, self.concat_selected_doc_len, dtype='float') 
            x = tf.concat(axis=2, values=[fw_h, bw_h])
            logits = linear_logits([x], True, input_keep_prob=self.input_keep_prob, mask=self.concat_selected_doc_mask, is_train=self.is_train, scope='logits1')
            probs = tf.nn.softmax(logits)
            doc_rep = tf.einsum('ijk,ij->ik', self.concat_selected_doc, probs)
            a_state = doc_rep

        self.a_history.append(a_state)

      if self.oracle == 'extra':
        scope_str = 'MACRnn-layer-%d' % 0
        if self.read_strategy == 'one_doc_per_it_and_repeat_2nd_step' and i > 1:
          m_state = self.m_history[0]
          a_state = self.a_history[0]
        _, _, a_state, _, _, _ = self.HierarchicalAttnMACCell(self.num_hops, cell, query, query_st, q_sub_st, context, context_st, c_mask, cdoc_mask, \
          q_mask, self.c_history, self.m_history, c_state, m_state, a_state, scope_str, reuse=True, greedy_read=greedy_read)
        
      if self.prediction == 'candidates':        
        if self.output_unit_type == 'triplet-mlp':
          g1 = self.MACOutputUnit(a_state, context, candidates, query=query)
          if (self.reasoning_unit != 'concat_first_sent' and self.reasoning_unit != 'concat_full_doc') and (self.reasoning_unit != 'attention-lstm' or self.read_strategy != 'one_doc_per_it'):
            for i in range(self.num_hops):
              gi = self.MACOutputUnit(self.a_history[i], context, candidates, query=query)
              self.answer_list.append(tf.expand_dims(gi, axis=1))
        else:
          g1 = self.MACOutputUnit(a_state, context, candidates)
          if (self.reasoning_unit != 'concat_first_sent' and self.reasoning_unit != 'concat_full_doc') and (self.reasoning_unit != 'attention-lstm' or self.read_strategy != 'one_doc_per_it'):
            for i in range(self.num_hops):
              gi = self.MACOutputUnit(self.a_history[i], context, candidates, reuse=True)
              self.answer_list.append(tf.expand_dims(gi, axis=1))

        return tf.expand_dims(g1, axis=1)
      else:
        raise NotImplementedError
Пример #7
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        print("VW:", VW, "N:", N, "M:", M, "JX:", JX, "JQ:", JQ)
        JA = config.max_answer_length
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        print("VW:", VW, "N:", N, "M:", M, "JX:", JX, "JQ:", JQ)
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            # Char-CNN Embedding
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(map(int, config.out_channel_dims.split(','))) # [100]
                    heights = list(map(int, config.filter_heights.split(','))) # [5]
                    assert sum(filter_sizes) == dco, (filter_sizes, dco) # Make sure filter channels = char_cnn_out size
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
                        else:
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            # Word Embedding
            if config.use_word_emb:
                with tf.variable_scope("emb_var") as scope, tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    tf.get_variable_scope().reuse_variables()
                    self.word_emb_scope = scope
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat([word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                # Concat Char-CNN Embedding and Word Embedding
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

            # exact match
            if config.use_exact_match: # TODO: What does it mean?
                emx = tf.expand_dims(tf.cast(self.emx, tf.float32), -1)
                xx = tf.concat([xx, emx], 3)  # [N, M, JX, di+1]
                emq = tf.expand_dims(tf.cast(self.emq, tf.float32), -1)
                qq = tf.concat([qq, emq], 2)  # [N, JQ, di+1]


        # 2 layer highway network on Concat Embedding
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        # Bidirection-LSTM (3rd layer on paper)
        cell = GRUCell(d) if config.GRU else BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]
        flat_x_len = flatten(x_len, 0)  # [N * M]

        with tf.variable_scope("prepro"):
            if config.use_fused_lstm: #yes
                with tf.variable_scope("u1"):
                    fw_inputs = tf.transpose(qq, [1, 0, 2]) #[time_len, batch_size, input_size]
                    bw_inputs = tf.reverse_sequence(fw_inputs, q_len, batch_dim=1, seq_dim=0)
                    fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                    bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                    prep_fw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                    prep_bw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                    fw_outputs, fw_final = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=q_len, scope="fw")
                    bw_outputs, bw_final = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=q_len, scope="bw")
                    bw_outputs = tf.reverse_sequence(bw_outputs, q_len, batch_dim=1, seq_dim = 0)
                    current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                    output = tf.transpose(current_inputs, [1, 0, 2])
                    u = output
                flat_xx = flatten(xx, 2)  # [N * M, JX, d]
                if config.share_lstm_weights: # Yes
                    tf.get_variable_scope().reuse_variables()
                    with tf.variable_scope("u1"):
                        fw_inputs = tf.transpose(flat_xx, [1, 0, 2]) #[time_len, batch_size, input_size]
                        bw_inputs = tf.reverse_sequence(fw_inputs, flat_x_len, batch_dim=1, seq_dim=0)
                        # fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                        # bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                        fw_outputs, fw_final = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="fw")
                        bw_outputs, bw_final = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="bw")
                        bw_outputs = tf.reverse_sequence(bw_outputs, flat_x_len, batch_dim=1, seq_dim=0)
                        current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                        output = tf.transpose(current_inputs, [1, 0, 2])
                else: # No
                    with tf.variable_scope("h1"):
                        fw_inputs = tf.transpose(flat_xx, [1, 0, 2]) #[time_len, batch_size, input_size]
                        bw_inputs = tf.reverse_sequence(fw_inputs, flat_x_len, batch_dim=1, seq_dim=0)
                        # fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                        # bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                        prep_fw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        prep_bw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        fw_outputs, fw_final = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="fw")
                        bw_outputs, bw_final = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=flat_x_len, scope="bw")
                        bw_outputs = tf.reverse_sequence(bw_outputs, flat_x_len, batch_dim=1, seq_dim=0)
                        current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                        output = tf.transpose(current_inputs, [1, 0, 2])
                h = tf.expand_dims(output, 1) # [N, M, JX, 2d]
            else:
                (fw_u, bw_u), _ = bidirectional_dynamic_rnn(d_cell, d_cell, qq, q_len, dtype='float', scope='u1')  # [N, J, d], [N, d]
                u = tf.concat([fw_u, bw_u], 2)
                if config.share_lstm_weights:
                    tf.get_variable_scope().reuse_variables()
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='u1')  # [N, M, JX, 2d]
                    h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
                else:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='h1')  # [N, M, JX, 2d]
                    h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u # hidden state of Q = u
            self.tensor_dict['h'] = h # hidden state of C = h

        # Attention Flow Layer (4th layer on paper)
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell = AttentionCell(cell, u, size=d, mask=q_mask, mapper='sim',
                                           input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
                first_cell = d_cell # a GRU cell with dropout wrapper
            tp0 = p0 # Output of Attention layer

        # Modeling layer (5th layer on paper)
        with tf.variable_scope('modeling_layer'):
            if config.use_fused_lstm:
                g1, encoder_state_final = build_fused_bidirectional_rnn(inputs=p0,
                                                                        num_units=config.hidden_size,
                                                                        num_layers=config.num_modeling_layers,
                                                                        inputs_length=flat_x_len,
                                                                        input_keep_prob=config.input_keep_prob,
                                                                        scope='modeling_layer_g')

            else:
                for layer_idx in range(config.num_modeling_layers-1):
                    (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell, first_cell, p0, x_len,
                                                                  dtype='float', scope="g_{}".format(layer_idx))  # [N, M, JX, 2d]
                    p0 = tf.concat([fw_g0, bw_g0], 3)
                (fw_g1, bw_g1), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(first_cell, first_cell, p0, x_len,
                                                                             dtype='float', scope='g1')  # [N, M, JX, 2d]
                g1 = tf.concat([fw_g1, bw_g1], 3)  # [N, M, JX, 2d]

        # Self match layer
        if config.use_self_match:
            s0 = tf.reshape(g1, [N * M, JX, 2 * d])                     # [N * M, JX, 2d]
            x_mask = tf.reshape(self.x_mask, [N * M, JX])               # [N * M, JX]
            if config.use_static_self_match:
                with tf.variable_scope("StaticSelfMatch"):              # implemented follow r-net section 3.3
                    W_x_Vj = tf.contrib.layers.fully_connected(         # [N * M, JX, d]
                        s0, int(d / 2), scope='row_first',
                        activation_fn=None, biases_initializer=None
                    )
                    W_x_Vt = tf.contrib.layers.fully_connected(         # [N * M, JX, d]
                        s0, int(d / 2), scope='col_first',
                        activation_fn=None, biases_initializer=None
                    )
                    sum_rc = tf.add(                                    # [N * M, JX, JX, d]
                        tf.expand_dims(W_x_Vj, 1),
                        tf.expand_dims(W_x_Vt, 2)
                    )
                    v = tf.get_variable('second', shape=[1, 1, 1, int(d / 2)], dtype=tf.float32)
                    Sj = tf.reduce_sum(tf.multiply(v, tf.tanh(sum_rc)), -1)     # [N * M, JX, JX]
                    Ai = softmax(Sj, mask = tf.expand_dims(x_mask, 1))          # [N * M, JX, JX]
                    Ai = tf.expand_dims(Ai, -1)                                 # [N * M, JX, JX, 1]
                    Vi = tf.expand_dims(s0, 1)                                  # [N * M, 1, JX, 2d]
                    Ct = tf.reduce_sum(                                         # [N * M, JX, 2d]
                        tf.multiply(Ai, Vi),
                        axis = 2
                    )
                    inputs_Vt_Ct = tf.concat([s0, Ct], 2)                       # [N * M, JX, 4d]
                    if config.use_fused_lstm:
                        fw_inputs = tf.transpose(inputs_Vt_Ct, [1, 0, 2])  # [time_len, batch_size, input_size]
                        bw_inputs = tf.reverse_sequence(fw_inputs, flat_x_len, batch_dim=1, seq_dim=0)
                        fw_inputs = tf.nn.dropout(fw_inputs, config.input_keep_prob)
                        bw_inputs = tf.nn.dropout(bw_inputs, config.input_keep_prob)
                        prep_fw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        prep_bw_cell = LSTMBlockFusedCell(d, cell_clip=0)
                        fw_outputs, fw_s_f = prep_fw_cell(fw_inputs, dtype=tf.float32, sequence_length=flat_x_len,
                                                            scope="fw")
                        bw_outputs, bw_s_f = prep_bw_cell(bw_inputs, dtype=tf.float32, sequence_length=flat_x_len,
                                                            scope="bw")
                        fw_s_f = LSTMStateTuple(c=fw_s_f[0], h=fw_s_f[1])
                        bw_s_f = LSTMStateTuple(c=bw_s_f[0], h=bw_s_f[1])
                        bw_outputs = tf.reverse_sequence(bw_outputs, flat_x_len, batch_dim=1, seq_dim=0)
                        current_inputs = tf.concat((fw_outputs, bw_outputs), 2)
                        s1 = tf.transpose(current_inputs, [1, 0, 2])
                    else:
                        (fw_s, bw_s), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(first_cell, first_cell, inputs_Vt_Ct,
                                                                                   flat_x_len, dtype='float',
                                                                                   scope='s')  # [N, M, JX, 2d]
                        s1 = tf.concat([fw_s, bw_s], 2)  # [N * M, JX, 2d], M == 1
            else:
                with tf.variable_scope("DynamicSelfMatch"):
                    first_cell = AttentionCell(cell, s0, size=d, mask=x_mask, is_train=self.is_train)
                    (fw_s, bw_s), (fw_s_f, bw_s_f) = bidirectional_dynamic_rnn(first_cell, first_cell, s0, x_len,
                                                                               dtype='float', scope='s')  # [N, M, JX, 2d]
                    s1 = tf.concat([fw_s, bw_s], 2)  # [N * M, JX, 2d], M == 1
            g1 = tf.expand_dims(s1, 1) # [N, M, JX, 2d]

        # prepare for PtrNet
        encoder_output = g1  # [N, M, JX, 2d]
        encoder_output = tf.expand_dims(tf.cast(self.x_mask, tf.float32), -1) * encoder_output  # [N, M, JX, 2d]

        if config.use_self_match or not config.use_fused_lstm:
            if config.GRU:
                encoder_state_final = tf.concat((fw_s_f, bw_s_f), 1, name='encoder_concat')
            else:
                if isinstance(fw_s_f, LSTMStateTuple):
                    encoder_state_c = tf.concat(
                        (fw_s_f.c, bw_s_f.c), 1, name='encoder_concat_c')
                    encoder_state_h = tf.concat(
                        (fw_s_f.h, bw_s_f.h), 1, name='encoder_concat_h')
                    encoder_state_final = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
                elif isinstance(fw_s_f, tf.Tensor):
                    encoder_state_final = tf.concat((fw_s_f, bw_s_f), 1, name='encoder_concat')
                else:
                    encoder_state_final = None
                    tf.logging.error("encoder_state_final not set")

        print("encoder_state_final:", encoder_state_final)

        with tf.variable_scope("output"):
            # eos_symbol = config.eos_symbol
            # next_symbol = config.next_symbol

            tf.assert_equal(M, 1)  # currently dynamic M is not supported, thus we assume M==1
            answer_string = tf.placeholder(
                shape=(N, 1, JA + 1),
                dtype=tf.int32,
                name='answer_string'
            )  # [N, M, JA + 1]
            answer_string_mask = tf.placeholder(
                shape=(N, 1, JA + 1),
                dtype=tf.bool,
                name='answer_string_mask'
            )  # [N, M, JA + 1]
            answer_string_length = tf.placeholder(
                shape=(N, 1),
                dtype=tf.int32,
                name='answer_string_length',
            ) # [N, M]
            self.tensor_dict['answer_string'] = answer_string
            self.tensor_dict['answer_string_mask'] = answer_string_mask
            self.tensor_dict['answer_string_length'] = answer_string_length
            self.answer_string = answer_string
            self.answer_string_mask = answer_string_mask
            self.answer_string_length = answer_string_length

            answer_string_flattened = tf.reshape(answer_string, [N * M, JA + 1])
            self.answer_string_flattened = answer_string_flattened  # [N * M, JA+1]
            print("answer_string_flattened:", answer_string_flattened)

            answer_string_length_flattened = tf.reshape(answer_string_length, [N * M])
            self.answer_string_length_flattened = answer_string_length_flattened  # [N * M]
            print("answer_string_length_flattened:", answer_string_length_flattened)

            decoder_cell = GRUCell(2 * d) if config.GRU else BasicLSTMCell(2 * d, state_is_tuple=True)

            with tf.variable_scope("Decoder"):
                decoder_train_logits = ptr_decoder(decoder_cell,
                                                   tf.reshape(tp0, [N * M, JX, 2 * d]),  # [N * M, JX, 2d]
                                                   tf.reshape(encoder_output, [N * M, JX, 2 * d]),  # [N * M, JX, 2d]
                                                   flat_x_len,
                                                   encoder_final_state=encoder_state_final,
                                                   max_encoder_length=config.sent_size_th,
                                                   decoder_output_length=answer_string_length_flattened,  # [N * M]
                                                   batch_size=N,  # N * M (M=1)
                                                   attention_proj_dim=self.config.decoder_proj_dim,
                                                   scope='ptr_decoder')  # [batch_size, dec_len*, enc_seq_len + 1]

                self.decoder_train_logits = decoder_train_logits
                print("decoder_train_logits:", decoder_train_logits)
                self.decoder_train_softmax = tf.nn.softmax(self.decoder_train_logits)
                self.decoder_inference = tf.argmax(decoder_train_logits, axis=2,
                                                   name='decoder_inference')  # [N, JA + 1]

            self.yp = tf.ones([N, M, JX], dtype=tf.int32) * -1
            self.yp2 = tf.ones([N, M, JX], dtype=tf.int32) * -1
Пример #8
0
    def _build_forward(self):
        config = self.config

        N = config.batch_size
        M = config.max_num_sents
        JX = config.max_sent_size
        JQ = config.max_ques_size
        VW = config.word_vocab_size
        VC = config.char_vocab_size
        W = config.max_word_size
        d = config.hidden_size

        JX = tf.shape(self.x)[2]  # JX max sentence size, length,
        JQ = tf.shape(self.q)[1]  # JQ max questions size, length, is the
        M = tf.shape(self.x)[1]  # m is the max number of sentences
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size
        # dc = 8, each char will be map to 8-number vector,  "char-level word embedding size [100]"
        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')
                    # 330,8 a matrix for each char to its 8-number vector

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)
                    # [N, M, JX, W, dc] 60,None,None,16,8, batch-size,
                    # N is the number of batch_size
                    # M the max number of sentences
                    # JX is the max sentence length
                    # W is  the max length of a word
                    # dc is the vector for each char
                    # map each char to a vector

                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    # JQ the max length of question
                    # W the max length of words
                    # mao each char in questiosn to vectors

                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])
                    # max questions size, length, max_word_size(16), char_emb_size(8)

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    # so here, there are 100 filters and the size of each filter is 5
                    # different heights and there are different number of these filter, but here just 100 5-long filters

                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(
                            qq, [-1, JQ, dco
                                 ])  # here, xx and qq are the output of cnn,

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:  # create a new word embedding or use the glove?
                        word_emb_mat = tf.concat(
                            [word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq  # here we used cnn and word embedding represented each word with a 200-unit vector
        # so for, xx, (batch_size, sentence#, word#, embedding), qq (batch_size, word#, embedding)
        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        # same shape with line 173
        cell = BasicLSTMCell(
            d, state_is_tuple=True)  # d = 100, hidden state number
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'),
                              2)  # [N, M], [60,?]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N] [60]
        # masks are true and false, here, he sums up those truths,
        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(
                [fw_u, bw_u],
                2)  # (60, ?, 200) |  200 becahse combined 2 100 hidden states
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat([fw_h, bw_h], 3)  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u  # [60, ?, 200] for question
            self.tensor_dict['h'] = h  # [60, ?, ?, 200] for article

        with tf.variable_scope("main"):
            if config.dynamic_att:  # todo what is this dynamic attention.
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                cell2 = BasicLSTMCell(
                    d, state_is_tuple=True)  # d = 100, hidden state number
                first_cell = SwitchableDropoutWrapper(
                    cell2,
                    self.is_train,
                    input_keep_prob=config.input_keep_prob)

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell,
                first_cell,
                inputs=p0,
                sequence_length=x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]

            g0 = tf.concat([fw_g0, bw_g0], 3)
            cell3 = BasicLSTMCell(
                d, state_is_tuple=True)  # d = 100, hidden state number
            first_cell3 = SwitchableDropoutWrapper(
                cell3, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell3, first_cell3, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat([fw_g1, bw_g1], 3)

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])
            cell4 = BasicLSTMCell(
                d, state_is_tuple=True)  # d = 100, hidden state number
            first_cell4 = SwitchableDropoutWrapper(
                cell4, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                first_cell4,
                first_cell4,
                tf.concat([p0, g1, a1i, g1 * a1i], 3),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat([fw_g2, bw_g2], 3)
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [-1, M, JX])
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Пример #9
0
    def _build_forward(self):
        #config为预先配置好的参数等
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        #嵌入层
        with tf.variable_scope("emb"):
            #字符嵌入层
            if config.use_char_emb:  #若需要字符嵌入层
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    #CNN的滤波器参数
                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)

                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            #词嵌入层
            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:  #若调用已训练好的词嵌入文件
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    #将文章主体context:x和问题query:q转换为词向量
                    #embedding_lookup(params, ids),根据ids寻找params中的第id行
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:  #若进行了字符嵌入,在指定维度上将字符嵌入和词嵌入进行拼接
                    xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # 经过两层highway network得到context vector∈ R^(d*T)和query vectorQ∈R^(d∗J)
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell = BasicLSTMCell(d, state_is_tuple=True)
        #SwitchableDropoutWrapper为自定义的DropoutWrapper类
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        #reduce_sum在指定的维度上求和(得到x和q的非空值总数),cast将输入的tensor映射到指定类型(此处为x_mask到int32)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        #Contextual Embedding Layer:对上一层得到的X和Q分别使用BiLSTM进行处理,分别捕捉X和Q中各自单词间的局部关系
        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            #fw_u和bw_u分别为双向lstm的output
            u = tf.concat(2, [fw_u, bw_u])  #[N, J, 2d]
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        #核心层Attention Flow Layer
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                #expand_dims()在矩阵指定位置增加维度
                #tile()对矩阵的指定维度进行复制
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [
                    N * M, JQ, 2 * d
                ])  #先在索引1的位置添加一个维度,然后复制M(context中最多的sentence数量)次,使u和h能具有相同的维度
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell,
                d_cell,
                tf.concat(3, [p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(3, [fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [-1, M, JX])
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Пример #10
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size
        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw],
                                                       initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat([word_emb_mat, self.new_emb_mat], 0)

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat([xx, Ax], 3)  # [N, M, JX, di]
                    qq = tf.concat([qq, Aq], 2)  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell_fw, d_cell_bw, qq, q_len,
                                                                                 dtype='float',
                                                                                 scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float',
                                                            scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float',
                                                            scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell_fw = AttentionCell(cell2_fw, u, mask=q_mask, mapper='sim',
                                              input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                first_cell_bw = AttentionCell(cell2_bw, u, mask=q_mask, mapper='sim',
                                              input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                second_cell_fw = AttentionCell(cell3_fw, u, mask=q_mask, mapper='sim',
                                               input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                second_cell_bw = AttentionCell(cell3_bw, u, mask=q_mask, mapper='sim',
                                               input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell_fw, first_cell_bw, p0, x_len, dtype='float',
                                                          scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(second_cell_fw, second_cell_bw, g0, x_len, dtype='float',
                                                          scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])

        with tf.variable_scope("output"):
            if config.model_name == "basic":
                logits = get_logits([g1, p0], d, True, wd=config.wd,
                                    input_keep_prob=config.input_keep_prob,
                                    mask=self.x_mask, is_train=self.is_train,
                                    func=config.answer_func, scope='logits1')
                a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                              tf.reshape(logits, [N, M * JX]))
                a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                              [1, M, JX, 1])
                (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell4_fw, d_cell4_bw,
                                                              tf.concat([p0, g1, a1i, g1 * a1i], 3),
                                                              x_len, dtype='float', scope='g2')  # [N, M, JX, 2d]
                g2 = tf.concat([fw_g2, bw_g2], 3)
                logits2 = get_logits([g2, p0], d, True, wd=config.wd,
                                     input_keep_prob=config.input_keep_prob, mask=self.x_mask,
                                     is_train=self.is_train, func=config.answer_func,
                                     scope='logits2')
                flat_logits = tf.reshape(logits, [-1, M * JX])
                flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
                yp = tf.reshape(flat_yp, [-1, M, JX])
                flat_logits2 = tf.reshape(logits2, [-1, M * JX])
                flat_yp2 = tf.nn.softmax(flat_logits2)
                yp2 = tf.reshape(flat_yp2, [-1, M, JX])

                self.tensor_dict['g1'] = g1
                self.tensor_dict['g2'] = g2

                self.logits = flat_logits
                self.logits2 = flat_logits2
                self.yp = yp
                self.yp2 = yp2

            elif config.model_name == "basic-class":
                C = 3 if config.data_dir.startswith('data/snli') else 2
                (fw_g2, bw_g2) = (fw_g1, bw_g1)

                if config.classifier == 'maxpool':
                    g2 = tf.concat([fw_g2, bw_g2], 3)  # [N, M, JX, 2d]
                    g2 = tf.reduce_max(g2, 2)  # [N, M, 2d]
                    g2_dim = 2 * d
                elif config.classifier == 'sumpool':
                    g2 = tf.concat([fw_g2, bw_g2], 3)
                    g2 = tf.reduce_sum(g2, 2)
                    g2_dim = 2 * d
                else:
                    fw_g2_ = tf.gather(tf.transpose(fw_g2, [2, 0, 1, 3]), JX - 1)
                    bw_g2_ = tf.gather(tf.transpose(bw_g2, [2, 0, 1, 3]), 0)
                    g2 = tf.concat([fw_g2_, bw_g2_], 2)
                    g2_dim = 2 * d

                g2_ = tf.reshape(g2, [N, g2_dim])

                logits0 = linear(g2_, C, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
                                 is_train=self.is_train, scope='classifier')
                flat_yp0 = tf.nn.softmax(logits0)
                yp0 = tf.reshape(flat_yp0, [N, M, C])
                self.tensor_dict['g1'] = g1
                self.logits0 = logits0
                self.yp0 = yp0
                self.logits = logits0
                self.yp = yp0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, d, dc, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.hidden_size, \
            config.char_emb_size, config.max_word_size
        H = config.max_tree_height

        x_mask = self.x > 0
        q_mask = self.q > 0
        tx_mask = self.tx > 0  # [N, M, H, JX]

        with tf.variable_scope("char_emb"):
            char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')
            Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
            Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]

            filter = tf.get_variable("filter", shape=[1, config.char_filter_height, dc, d], dtype='float')
            bias = tf.get_variable("bias", shape=[d], dtype='float')
            strides = [1, 1, 1, 1]
            Acx = tf.reshape(Acx, [-1, JX, W, dc])
            Acq = tf.reshape(Acq, [-1, JQ, W, dc])
            xxc = tf.nn.conv2d(Acx, filter, strides, "VALID") + bias  # [N*M, JX, W/filter_stride, d]
            qqc = tf.nn.conv2d(Acq, filter, strides, "VALID") + bias  # [N, JQ, W/filter_stride, d]
            xxc = tf.reshape(tf.reduce_max(tf.nn.relu(xxc), 2), [-1, M, JX, d])
            qqc = tf.reshape(tf.reduce_max(tf.nn.relu(qqc), 2), [-1, JQ, d])

        with tf.variable_scope("word_emb"):
            if config.mode == 'train':
                word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, config.word_emb_size], initializer=get_initializer(config.emb_mat))
            else:
                word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, config.word_emb_size], dtype='float')
            Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
            Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
            # Ax = linear([Ax], d, False, scope='Ax_reshape')
            # Aq = linear([Aq], d, False, scope='Aq_reshape')

        xx = tf.concat(3, [xxc, Ax])  # [N, M, JX, 2d]
        qq = tf.concat(2, [qqc, Aq])  # [N, JQ, 2d]
        D = d + config.word_emb_size

        with tf.variable_scope("pos_emb"):
            pos_emb_mat = tf.get_variable("pos_emb_mat", shape=[config.pos_vocab_size, d], dtype='float')
            Atx = tf.nn.embedding_lookup(pos_emb_mat, self.tx)  # [N, M, H, JX, d]

        cell = BasicLSTMCell(D, state_is_tuple=True)
        cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("rnn"):
            (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='start')  # [N, M, JX, 2d]
            tf.get_variable_scope().reuse_variables()
            (fw_us, bw_us), (_, (fw_u, bw_u)) = bidirectional_dynamic_rnn(cell, cell, qq, q_len, dtype='float', scope='start')  # [N, J, d], [N, d]
            u = (fw_u + bw_u) / 2.0
            h = (fw_h + bw_h) / 2.0

        with tf.variable_scope("h"):
            no_op_cell = NoOpCell(D)
            tree_rnn_cell = TreeRNNCell(no_op_cell, d, tf.reduce_max)
            initial_state = tf.reshape(h, [N*M*JX, D])  # [N*M*JX, D]
            inputs = tf.concat(4, [Atx, tf.cast(self.tx_edge_mask, 'float')])  # [N, M, H, JX, d+JX]
            inputs = tf.reshape(tf.transpose(inputs, [0, 1, 3, 2, 4]), [N*M*JX, H, d + JX])  # [N*M*JX, H, d+JX]
            length = tf.reshape(tf.reduce_sum(tf.cast(tx_mask, 'int32'), 2), [N*M*JX])
            # length = tf.reshape(tf.reduce_sum(tf.cast(tf.transpose(tx_mask, [0, 1, 3, 2]), 'float'), 3), [-1])
            h, _ = dynamic_rnn(tree_rnn_cell, inputs, length, initial_state=initial_state)  # [N*M*JX, H, D]
            h = tf.transpose(tf.reshape(h, [N, M, JX, H, D]), [0, 1, 3, 2, 4])  # [N, M, H, JX, D]

        u = tf.expand_dims(tf.expand_dims(tf.expand_dims(u, 1), 1), 1)  # [N, 1, 1, 1, 4d]
        dot = linear(h * u, 1, True, squeeze=True, scope='dot')  # [N, M, H, JX]
        # self.logits = tf.reshape(dot, [N, M * H * JX])
        self.logits = tf.reshape(exp_mask(dot, tx_mask), [N, M * H * JX])  # [N, M, H, JX]
        self.yp = tf.reshape(tf.nn.softmax(self.logits), [N, M, H, JX])
Пример #12
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        print("parameters", N, M, JX, JQ, VW, VC, d, W)
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
                        else:
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        # prepro layer cell
        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell_fwh = BasicLSTMCell(d, state_is_tuple=True)
        cell_bwh = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fwh = SwitchableDropoutWrapper(cell_fwh, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bwh = SwitchableDropoutWrapper(cell_bwh, self.is_train, input_keep_prob=config.input_keep_prob)

        # attention layer cell
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)

        # out layer cell_bw
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_h, bw_h), ((_, fw_h_f), (_, bw_h_f)) = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='h1')
            h = tf.concat(axis=3, values=[fw_h, bw_h])

            (fw_u, bw_u), ((_, fw_h_f), (_, bw_h_f)) = bidirectional_dynamic_rnn(d_cell_fw, d_cell_bw, qq, q_len, dtype='float', scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])

            
            # if config.share_lstm_weights:
            #     tf.get_variable_scope().reuse_variables()
            #     (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='u1')  # [N, M, JX, 2d]
            #     h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            # else:
            #     (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='h1')  # [N, M, JX, 2d]
            #     h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]

            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h
            # h = tf.Print(h, [tf.reduce_max(h), tf.reduce_min(h), "h"], summarize=1000)

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell_fw = AttentionCell(cell2_fw, u, mask=q_mask, mapper='sim',
                                              input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                first_cell_bw = AttentionCell(cell2_bw, u, mask=q_mask, mapper='sim',
                                              input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                second_cell_fw = AttentionCell(cell3_fw, u, mask=q_mask, mapper='sim',
                                            input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                second_cell_bw = AttentionCell(cell3_bw, u, mask=q_mask, mapper='sim',
                                               input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            # p0 = tf.Print(p0, [p0, "lstm output-1:"],  summarize=200)

            # two layer LSTM
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell_fw, first_cell_bw, p0, x_len, dtype='float', scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            # g0 = tf.Print(g0, [g0, "lstm output0:"],  summarize=200)
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(second_cell_fw, second_cell_bw, g0, x_len, dtype='float', scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])
            # g1 = tf.Print(g1, [g1, "lstm output:"],  summarize=200)
            

            # output through a denselayer
        with tf.variable_scope("output"):
            #lstm_out = g1[:,0,-1,:]
            lstm_out = tf.reduce_sum(g1, axis = 2)
            lstm_out = tf.reshape(lstm_out, [-1, 2*d])
            lstm_out = tf.nn.dropout(lstm_out, 0.5)
            #logits = get_logits([g1, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
            #                    mask=self.x_mask, is_train=self.is_train, func='linear', scope='logits1')
            #print (logits)
            #lstm_out.set_shape([N, M*JX*2*d])
            dense1 = tf.layers.dense(inputs=lstm_out, units=64, activation = tf.nn.relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
            score = tf.layers.dense(inputs=dense1, units = 2, activation = None, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
            self.probs = tf.nn.softmax(score)

            # self.tensor_dict['g1'] = g1
            # self.tensor_dict['g2'] = g2
            self.score = score
Пример #13
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W ,EW, WOW= \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size,config.word_vocab_size-config.vw_wo_entity_size,config.vw_wo_entity_size
        JX = tf.shape(self.x)[2]  # words
        JQ = tf.shape(self.q)[1] # words
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        #print ("dhruv is here",N, self.x.get_shape(), JX, self.q.get_shape(), VW, VC, d, W,dc, dw, dco)
        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
                        else:
                            qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        entity_emb_mat = tf.get_variable("entity_emb_mat", dtype='float', shape=[EW, EW], initializer=get_initializer(config.onehot_encoded))
                        entity_emb_out = _linear(entity_emb_mat, dw, True, bias_initializer=tf.constant_initializer(0.0))
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[WOW, dw], initializer=get_initializer(config.emb_mat))
                        word_emb_mat = tf.concat(axis=0,values=[word_emb_mat, entity_emb_out])
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')

                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])
                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d] i.e. [batch size, max sentences, max words, embedding size]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d] i.e. [batch size, max words, embedding size]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        #xx = tf.Print(xx,[tf.shape(xx),xx],message="DHRUV xx=",summarize=20)
        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell_fw, d_cell_bw, qq, q_len, dtype='float', scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, xx, x_len, dtype='float', scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att: # not true
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell_fw = AttentionCell(cell2_fw, u, mask=q_mask, mapper='sim',
                                              input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                first_cell_bw = AttentionCell(cell2_bw, u, mask=q_mask, mapper='sim',
                                              input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                second_cell_fw = AttentionCell(cell3_fw, u, mask=q_mask, mapper='sim',
                                            input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
                second_cell_bw = AttentionCell(cell3_bw, u, mask=q_mask, mapper='sim',
                                               input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict) # p0 seems to be G in paper
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell_fw, first_cell_bw, p0, x_len, dtype='float', scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(second_cell_fw, second_cell_bw, g0, x_len, dtype='float', scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1]) # g1 seems to be M in paper

            g1= tf.Print(g1,[tf.shape(g1)],message="g1 shape",first_n=5,summarize=200)
            p0 = tf.Print(p0, [tf.shape(p0)], message="p0 shape", first_n=5, summarize=200)

            my_cell_fw = BasicLSTMCell(d, state_is_tuple=True)
            my_cell_fw_d = SwitchableDropoutWrapper(my_cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
            my_cell_bw = BasicLSTMCell(d, state_is_tuple=True)
            my_cell_bw_d = SwitchableDropoutWrapper(my_cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)

            (fw_g11,bw_g11),(my_fw_final_state, my_bw_final_state),g11_len = my_bidirectional_dynamic_rnn(my_cell_fw_d, my_cell_bw_d, g1, x_len, dtype='float', scope='my_g2')  # [N, M, JX, 2d]
            g11 = tf.concat(axis=2, values=[fw_g11, bw_g11])

            my_encoder_final_state_c = tf.concat(values = (my_fw_final_state.c, my_bw_final_state.c), axis = 1, name = "my_encoder_final_state_c")
            my_encoder_final_state_h = tf.concat(values = (my_fw_final_state.h, my_bw_final_state.h), axis = 1, name = "my_encoder_final_state_h")
            my_encoder_final_state = tf.contrib.rnn.LSTMStateTuple(c = my_encoder_final_state_c, h = my_encoder_final_state_h)

            #compute indices for finding span as the second task in multi task learning
            logits = get_logits([g1, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask, is_train=self.is_train, func=config.answer_func, scope='logits1')
            logits = tf.Print(logits, [tf.shape(logits)], message="logits shape", first_n=5, summarize=200)
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]), tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell4_fw, d_cell4_bw,
                                                          tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                                                          x_len, dtype='float', scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train, func=config.answer_func, scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_logits = tf.Print(flat_logits, [tf.shape(flat_logits),flat_logits], message="flat_logits shape and contents", first_n=5, summarize=200)
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)

            tgt_vocab_size = config.len_new_emb_mat # hparam # FIXME: Obtain embeddings differently?
            print("length is",config.len_new_emb_mat)
            tgt_embedding_size = dw # hparam

            # Look up embedding
            decoder_emb_inp = tf.nn.embedding_lookup(word_emb_mat, self.decoder_inputs) # [batch_size, max words, embedding_size]



            def decode_with_attention(helper, scope, reuse=None,maximum_iterations=None):
                with tf.variable_scope(scope, reuse=reuse):
                    attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=d, memory=g11)
                    cell = tf.contrib.rnn.GRUCell(num_units=d)
                    attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism,attention_layer_size=d /2)
                    out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, tgt_vocab_size, reuse=reuse)
                    decoder = tf.contrib.seq2seq.BasicDecoder(cell=out_cell, helper=helper,initial_state=out_cell.zero_state(
                                                                  dtype=tf.float32, batch_size=N))
                    # initial_state=encoder_final_state)
                    outputs = tf.contrib.seq2seq.dynamic_decode(decoder=decoder, output_time_major=False,
                                                                impute_finished=True, maximum_iterations=maximum_iterations)
                    return outputs[0]

            def decode(helper, scope, reuse=None, maximum_iterations=None):
                with tf.variable_scope(scope, reuse=reuse):
                    decoder_cell = BasicLSTMCell(2 * d, state_is_tuple=True) # hparam
                    projection_layer = layers_core.Dense(tgt_vocab_size, use_bias=False) # hparam
                    decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, my_encoder_final_state,output_layer=projection_layer) # decoder
                    final_outputs, _ ,_= tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=False, impute_finished=True,
                                                                         maximum_iterations=maximum_iterations) # dynamic decoding
                    return final_outputs
            # Decoder
            if config.mode == 'train': #TODO:doesnt seem to be correct to use this variable for dev
                training_helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp, self.target_sequence_length,time_major=False)
                #final_outputs = decode(helper=training_helper, scope="HAHA", reuse=None)
                final_outputs = decode_with_attention(helper=training_helper, scope="HAHA", reuse=None)

            else:
                training_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(word_emb_mat, tf.fill([N], self.tgt_sos_id),self.tgt_eos_id)
                #final_outputs= decode(helper=training_helper, scope="HAHA", reuse=True,maximum_iterations=100)
                final_outputs= decode_with_attention(helper=training_helper, scope="HAHA", reuse=True,maximum_iterations=100)

            self.decoder_logits_train = final_outputs.rnn_output
            self.index_start = flat_logits
            self.index_end = flat_logits2
Пример #14
0
    def build_forward(self,
                      k,
                      c_mask,
                      query,
                      query_st,
                      q_mask,
                      cand_emb,
                      cand_mask,
                      drop_one_doc=False,
                      dropped_hops=None,
                      reuse=False):
        self.selected_sent_ids = self.model.selected_sent_ids
        cand_emb = tf.squeeze(cand_emb, axis=1)
        config = self.config
        model = self.model
        context_dim = self.context_dim
        x_acc_sents_len = model.x_sents_len_reconstruct
        self.x_acc_sents_len = x_acc_sents_len
        zero_prepend_x_acc_sents_len = tf.concat([
            tf.expand_dims(tf.zeros_like(x_acc_sents_len[:, :, 0]), axis=2),
            x_acc_sents_len
        ],
                                                 axis=2)
        with tf.variable_scope('assembler', reuse=reuse):

            if config.assembler_biattn_w_first_doc:
                assert config.assembler_merge_query_st, (
                    "If biattn with first doc, then has to merge query information in another way."
                )

                for hop in range(config.num_hops - 1):
                    doc_indices = model.mac_rnn_cell.doc_indices_history[hop +
                                                                         1]
                    doc2 = tf.gather_nd(k, doc_indices)
                    doc2_mask = tf.cast(
                        tf.gather_nd(tf.cast(c_mask, 'float32'), doc_indices),
                        'bool')
                    #doc2_first_sent_len = tf.gather_nd(x_sents_len[:, :, 0], doc_indices)
                    selected_sent_indices = tf.concat([
                        doc_indices,
                        tf.expand_dims(self.selected_sent_ids[:, hop + 1],
                                       axis=1)
                    ],
                                                      axis=-1)
                    self.selected_sent_indices = selected_sent_indices
                    doc2_selected_sent_end = tf.gather_nd(
                        x_acc_sents_len, selected_sent_indices)
                    doc2_selected_sent_start = tf.gather_nd(
                        zero_prepend_x_acc_sents_len, selected_sent_indices)
                    self.doc2_selected_sent_start = doc2_selected_sent_start
                    self.doc2_selected_sent_end = doc2_selected_sent_end
                    self.doc_lens = []
                    self.prev_doc_lens = []
                    self.new_doc_lens = []
                    self.padding_lens = []
                    if hop == 0:
                        self.concat_selected_doc = []
                        for i in range(config.batch_size):
                            new_doc = tf.slice(
                                doc2[i], [doc2_selected_sent_start[i], 0], [
                                    doc2_selected_sent_end[i] -
                                    doc2_selected_sent_start[i], -1
                                ])
                            self.concat_selected_doc.append(new_doc)
                        self.concat_selected_doc_len = doc2_selected_sent_end - doc2_selected_sent_start
                    else:
                        for i in range(config.batch_size):
                            prev_doc = tf.slice(
                                self.concat_selected_doc[i], [0, 0],
                                [self.concat_selected_doc_len[i], -1])
                            new_doc = tf.slice(
                                doc2[i], [doc2_selected_sent_start[i], 0], [
                                    doc2_selected_sent_end[i] -
                                    doc2_selected_sent_start[i], -1
                                ])
                            padding_len = tf.reduce_max(
                                self.concat_selected_doc_len +
                                doc2_selected_sent_end -
                                doc2_selected_sent_start
                            ) - self.concat_selected_doc_len[i] - (
                                doc2_selected_sent_end[i] -
                                doc2_selected_sent_start[i])
                            padding = tf.zeros([padding_len, context_dim])
                            self.concat_selected_doc[i] = tf.concat(
                                [prev_doc, new_doc, padding], axis=0)
                            self.doc_lens.append(
                                tf.shape(
                                    tf.concat([prev_doc, new_doc, padding],
                                              axis=0))[0])
                            self.prev_doc_lens.append(tf.shape(prev_doc)[0])
                            self.new_doc_lens.append(tf.shape(new_doc)[0])
                            self.padding_lens.append(padding_len)
                        self.concat_selected_doc_len += (
                            doc2_selected_sent_end - doc2_selected_sent_start)

                max_len = tf.reduce_max(self.concat_selected_doc_len)
                self.concat_selected_doc_mask = []
                for i in range(config.batch_size):
                    concat_selected_doc_mask_i = tf.concat(values=[
                        tf.ones([self.concat_selected_doc_len[i]]),
                        tf.zeros([max_len - self.concat_selected_doc_len[i]])
                    ],
                                                           axis=0)
                    self.concat_selected_doc_mask.append(
                        concat_selected_doc_mask_i)

                self.concat_selected_doc = tf.stack(self.concat_selected_doc,
                                                    axis=0)
                self.concat_selected_doc_mask = tf.cast(
                    tf.stack(self.concat_selected_doc_mask, axis=0), 'bool')

                first_doc_indices = model.mac_rnn_cell.doc_indices_history[0]
                doc1 = tf.gather_nd(k, first_doc_indices)
                doc1_mask = tf.cast(
                    tf.gather_nd(tf.cast(c_mask, 'float32'),
                                 first_doc_indices), 'bool')
                p0 = biattention_layer(self.is_train,
                                       self.concat_selected_doc,
                                       doc1,
                                       h_mask=self.concat_selected_doc_mask,
                                       u_mask=doc1_mask)
            else:
                if drop_one_doc:
                    num_docs_to_concat = config.num_hops - 1
                else:
                    num_docs_to_concat = (
                        config.num_hops + 2 if config.mac_read_strategy
                        == 'one_doc_per_it_and_repeat_for_3_hops' else
                        (config.num_hops - 1) * 2
                    ) if config.assembler_repeat_first_doc else config.num_hops
                for hop in range(num_docs_to_concat):
                    if config.assembler_repeat_first_doc:
                        if drop_one_doc:
                            raise NotImplementedError
                        if config.mac_read_strategy == 'one_doc_per_it_and_repeat_for_3_hops':
                            doc_indices = model.mac_rnn_cell.doc_indices_history[
                                0] if (
                                    hop % 4 == 1 or hop == 0
                                ) else model.mac_rnn_cell.doc_indices_history[
                                    hop - int(hop / 5)]
                        else:
                            doc_indices = model.mac_rnn_cell.doc_indices_history[
                                0] if hop % 2 == 0 else model.mac_rnn_cell.doc_indices_history[
                                    int(hop / 2) + 1]
                    else:
                        if drop_one_doc:
                            doc_indices = tf.squeeze(tf.slice(
                                tf.stack(
                                    model.mac_rnn_cell.doc_indices_history),
                                [dropped_hops[hop], 0, 0], [1, -1, -1]),
                                                     axis=0)
                        else:
                            doc_indices = model.mac_rnn_cell.doc_indices_history[
                                hop]
                    doc2 = tf.gather_nd(k, doc_indices)
                    doc2_mask = tf.cast(
                        tf.gather_nd(tf.cast(c_mask, 'float32'), doc_indices),
                        'bool')
                    #doc2_first_sent_len = tf.gather_nd(x_sents_len[:, :, 0], doc_indices)
                    if config.assembler_repeat_first_doc:
                        if config.mac_read_strategy == 'one_doc_per_it_and_repeat_for_3_hops':
                            if hop % 4 == 1 or hop == 0:
                                selected_sent_indices = tf.concat([
                                    doc_indices,
                                    tf.expand_dims(
                                        self.selected_sent_ids[:, 0], axis=1)
                                ],
                                                                  axis=-1)
                            else:
                                selected_sent_indices = tf.concat([
                                    doc_indices,
                                    tf.expand_dims(
                                        self.selected_sent_ids[:, hop -
                                                               int(hop / 5)],
                                        axis=1)
                                ],
                                                                  axis=-1)
                        else:
                            if hop % 2 == 0:
                                print("concat first doc")
                                selected_sent_indices = tf.concat([
                                    doc_indices,
                                    tf.expand_dims(
                                        self.selected_sent_ids[:, 0], axis=1)
                                ],
                                                                  axis=-1)
                            else:
                                print("concat second doc")
                                selected_sent_indices = tf.concat([
                                    doc_indices,
                                    tf.expand_dims(
                                        self.selected_sent_ids[:,
                                                               int(hop / 2) +
                                                               1],
                                        axis=1)
                                ],
                                                                  axis=-1)
                    else:
                        selected_sent_indices = tf.concat([
                            doc_indices,
                            tf.expand_dims(self.selected_sent_ids[:, hop],
                                           axis=1)
                        ],
                                                          axis=-1)
                    self.selected_sent_indices = selected_sent_indices
                    doc2_selected_sent_end = tf.gather_nd(
                        x_acc_sents_len, selected_sent_indices)
                    doc2_selected_sent_start = tf.gather_nd(
                        zero_prepend_x_acc_sents_len, selected_sent_indices)
                    self.doc2_selected_sent_start = doc2_selected_sent_start
                    self.doc2_selected_sent_end = doc2_selected_sent_end

                    if hop == 0:
                        self.doc_lens = []
                        self.prev_doc_lens = []
                        self.new_doc_lens = []
                        self.padding_lens = []
                        self.concat_selected_doc = []
                        for i in range(config.batch_size):
                            new_doc = tf.slice(
                                doc2[i], [doc2_selected_sent_start[i], 0], [
                                    doc2_selected_sent_end[i] -
                                    doc2_selected_sent_start[i], -1
                                ])
                            self.concat_selected_doc.append(new_doc)
                        self.concat_selected_doc_len = doc2_selected_sent_end - doc2_selected_sent_start
                    else:
                        for i in range(config.batch_size):
                            prev_doc = tf.slice(
                                self.concat_selected_doc[i], [0, 0],
                                [self.concat_selected_doc_len[i], -1])
                            new_doc = tf.slice(
                                doc2[i], [doc2_selected_sent_start[i], 0], [
                                    doc2_selected_sent_end[i] -
                                    doc2_selected_sent_start[i], -1
                                ])
                            padding_len = tf.reduce_max(
                                self.concat_selected_doc_len +
                                doc2_selected_sent_end -
                                doc2_selected_sent_start
                            ) - self.concat_selected_doc_len[i] - (
                                doc2_selected_sent_end[i] -
                                doc2_selected_sent_start[i])
                            padding = tf.zeros([padding_len, context_dim])
                            self.concat_selected_doc[i] = tf.concat(
                                [prev_doc, new_doc, padding], axis=0)
                            self.doc_lens.append(
                                tf.shape(
                                    tf.concat([prev_doc, new_doc, padding],
                                              axis=0))[0])
                            self.prev_doc_lens.append(tf.shape(prev_doc)[0])
                            self.new_doc_lens.append(tf.shape(new_doc)[0])
                            self.padding_lens.append(padding_len)
                        self.concat_selected_doc_len += (
                            doc2_selected_sent_end - doc2_selected_sent_start)

                max_len = tf.reduce_max(self.concat_selected_doc_len)
                self.concat_selected_doc_mask = []
                for i in range(config.batch_size):
                    concat_selected_doc_mask_i = tf.concat(values=[
                        tf.ones([self.concat_selected_doc_len[i]]),
                        tf.zeros([max_len - self.concat_selected_doc_len[i]])
                    ],
                                                           axis=0)
                    self.concat_selected_doc_mask.append(
                        concat_selected_doc_mask_i)

                self.concat_selected_doc = tf.stack(self.concat_selected_doc,
                                                    axis=0)
                self.concat_selected_doc_mask = tf.cast(
                    tf.stack(self.concat_selected_doc_mask, axis=0), 'bool')
                p0 = biattention_layer(self.is_train,
                                       self.concat_selected_doc,
                                       query,
                                       h_mask=self.concat_selected_doc_mask,
                                       u_mask=q_mask)

            p0 = tf.squeeze(p0, axis=1)

            if config.assembler_bidaf_layer > 1:
                with tf.variable_scope("layer_1"):
                    cell_fw = BasicLSTMCell(40, state_is_tuple=True)
                    cell_bw = BasicLSTMCell(40, state_is_tuple=True)
                    cell_fw = SwitchableDropoutWrapper(
                        cell_fw,
                        self.is_train,
                        input_keep_prob=config.input_keep_prob)
                    cell_bw = SwitchableDropoutWrapper(
                        cell_bw,
                        self.is_train,
                        input_keep_prob=config.input_keep_prob)

                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        p0,
                        self.concat_selected_doc_len,
                        dtype='float')
                    x = tf.concat(axis=2, values=[fw_h, bw_h])

                with tf.variable_scope("layer_2"):
                    cell2_fw = BasicLSTMCell(40, state_is_tuple=True)
                    cell2_bw = BasicLSTMCell(40, state_is_tuple=True)
                    cell2_fw = SwitchableDropoutWrapper(
                        cell2_fw,
                        self.is_train,
                        input_keep_prob=config.input_keep_prob)
                    cell2_bw = SwitchableDropoutWrapper(
                        cell2_bw,
                        self.is_train,
                        input_keep_prob=config.input_keep_prob)

                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell2_fw,
                        cell2_bw,
                        x,
                        self.concat_selected_doc_len,
                        dtype='float')
                    x = tf.concat(axis=2, values=[fw_h, bw_h])
            else:
                cell_fw = BasicLSTMCell(40, state_is_tuple=True)
                cell_bw = BasicLSTMCell(40, state_is_tuple=True)
                cell_fw = SwitchableDropoutWrapper(
                    cell_fw,
                    self.is_train,
                    input_keep_prob=config.input_keep_prob)
                cell_bw = SwitchableDropoutWrapper(
                    cell_bw,
                    self.is_train,
                    input_keep_prob=config.input_keep_prob)

                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    p0,
                    self.concat_selected_doc_len,
                    dtype='float')
                x = tf.concat(axis=2, values=[fw_h, bw_h])

            logits = linear_logits([x],
                                   True,
                                   input_keep_prob=config.input_keep_prob,
                                   mask=self.concat_selected_doc_mask,
                                   is_train=self.is_train,
                                   scope='a_state_logits')
            probs = tf.nn.softmax(logits)
            new_ans = tf.einsum('ijk,ij->ik', self.concat_selected_doc, probs)
            if config.assembler_merge_query_st:
                W_c = tf.get_variable('W_c', [40, 40])
                b_c = tf.get_variable('b_c', [40])
                c_proj = tf.matmul(query_st, W_c) + b_c

                W1 = tf.get_variable('W1', [3 * 40, 2 * 40])
                b1 = tf.get_variable('b1', [2 * 40])
                W2 = tf.get_variable('W2', [2 * 40, 40])
                b2 = tf.get_variable('b2', [40])
                concat_in = tf.concat(
                    axis=-1, values=[new_ans, c_proj, new_ans * c_proj])
                a_state = tf.matmul(tf.nn.relu(tf.matmul(concat_in, W1) + b1),
                                    W2) + b2
            else:
                a_state = new_ans
            g1 = tf.expand_dims(self.output_unit(cand_emb, a_state), axis=1)
            self.g1 = g1
            self.cand_mask = cand_mask
            self.cand_emb = cand_emb
            logits = linear_logits([g1],
                                   True,
                                   is_train=self.is_train,
                                   input_keep_prob=config.input_keep_prob,
                                   mask=cand_mask,
                                   scope='g1_logits')

            JX = tf.shape(g1)[2]
            self.g1_shape = tf.shape(g1)
            flat_logits = tf.reshape(logits, [config.batch_size, JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            yp = tf.reshape(flat_yp, [config.batch_size, 1, JX])
            self.logits = flat_logits
            self.yp = yp
Пример #15
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            0, [word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(
            cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell,
                                             d_cell,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.two_prepro_layers:
                (fw_u, bw_u), ((_, fw_u_f),
                               (_, bw_u_f)) = bidirectional_dynamic_rnn(
                                   d_cell,
                                   d_cell,
                                   u,
                                   q_len,
                                   dtype='float',
                                   scope='u2')  # [N, J, d], [N, d]
                u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
                if config.two_prepro_layers:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell, cell, h, x_len, dtype='float',
                        scope='u2')  # [N, M, JX, 2d]
                    h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]

            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell, cell, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
                if config.two_prepro_layers:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell, cell, h, x_len, dtype='float',
                        scope='h2')  # [N, M, JX, 2d]
                    h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell = AttentionCell(
                    cell,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell = d_cell
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, p0, x_len, dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                first_cell, first_cell, g0, x_len, dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

            if config.late:
                (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                    d_cell,
                    d_cell,
                    tf.concat(3, [g1, p0]),
                    x_len,
                    dtype='float',
                    scope='g2')  # [N, M, JX, 2d]
                g2 = tf.concat(3, [fw_g2, bw_g2])
                # logits2 = u_logits(config, self.is_train, tf.concat(3, [g1, a1i]), u, h_mask=self.x_mask, u_mask=self.q_mask, scope="logits2")

                logits = get_logits([g1, g2, p0],
                                    d,
                                    True,
                                    wd=config.wd,
                                    input_keep_prob=config.input_keep_prob,
                                    mask=self.x_mask,
                                    is_train=self.is_train,
                                    func=config.answer_func,
                                    scope='logits1')

                if config.feed_gt:
                    logy = tf.log(tf.cast(self.y, 'float') + VERY_SMALL_NUMBER)
                    logits = tf.cond(self.is_train, lambda: logy,
                                     lambda: logits)
                if config.feed_hard:
                    hard_yp = tf.argmax(tf.reshape(logits, [N, M * JX]), 1)
                    hard_logits = tf.reshape(tf.one_hot(hard_yp, M * JX),
                                             [N, M, JX])  # [N, M, JX]
                    logits = tf.cond(self.is_train, lambda: logits,
                                     lambda: hard_logits)

                flat_logits = tf.reshape(logits, [-1, M * JX])
                flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
                yp = tf.reshape(flat_yp, [-1, M, JX])

                logits2 = get_logits([g1, g2, p0],
                                     d,
                                     True,
                                     wd=config.wd,
                                     input_keep_prob=config.input_keep_prob,
                                     mask=self.x_mask,
                                     is_train=self.is_train,
                                     func=config.answer_func,
                                     scope='logits2')

                flat_logits2 = tf.reshape(logits2, [-1, M * JX])
                flat_yp2 = tf.nn.softmax(flat_logits2)
                yp2 = tf.reshape(flat_yp2, [-1, M, JX])
            else:
                logits = get_logits([g1, p0],
                                    d,
                                    True,
                                    wd=config.wd,
                                    input_keep_prob=config.input_keep_prob,
                                    mask=self.x_mask,
                                    is_train=self.is_train,
                                    func=config.answer_func,
                                    scope='logits1')
                a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                              tf.reshape(logits, [N, M * JX]))

                if config.feed_gt:
                    logy = tf.log(tf.cast(self.y, 'float') + VERY_SMALL_NUMBER)
                    logits = tf.cond(self.is_train, lambda: logy,
                                     lambda: logits)
                if config.feed_hard:
                    hard_yp = tf.argmax(tf.reshape(logits, [N, M * JX]), 1)
                    hard_logits = tf.reshape(tf.one_hot(hard_yp, M * JX),
                                             [N, M, JX])  # [N, M, JX]
                    logits = tf.cond(self.is_train, lambda: logits,
                                     lambda: hard_logits)

                flat_logits = tf.reshape(logits, [-1, M * JX])
                flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
                yp = tf.reshape(flat_yp, [-1, M, JX])

                a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                              [1, M, JX, 1])
                yp_aug = tf.expand_dims(yp, -1)
                g1yp = g1 * yp_aug
                if config.prev_mode == 'a':
                    prev = a1i
                elif config.prev_mode == 'y':
                    prev = yp_aug
                elif config.prev_mode == 'gy':
                    prev = g1yp
                else:
                    raise Exception()
                (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                    d_cell,
                    d_cell,
                    tf.concat(3, [p0, g1, prev, g1 * prev]),
                    x_len,
                    dtype='float',
                    scope='g2')  # [N, M, JX, 2d]
                g2 = tf.concat(3, [fw_g2, bw_g2])
                # logits2 = u_logits(config, self.is_train, tf.concat(3, [g1, a1i]), u, h_mask=self.x_mask, u_mask=self.q_mask, scope="logits2")
                logits2 = get_logits([g2, p0],
                                     d,
                                     True,
                                     wd=config.wd,
                                     input_keep_prob=config.input_keep_prob,
                                     mask=self.x_mask,
                                     is_train=self.is_train,
                                     func=config.answer_func,
                                     scope='logits2')

                flat_logits2 = tf.reshape(logits2, [-1, M * JX])
                flat_yp2 = tf.nn.softmax(flat_logits2)
                yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
Пример #16
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            print('word embedding')
            # if config.use_char_emb:
            #     with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
            #         char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float')

            #     with tf.variable_scope("char"):
            #         Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx)  # [N, M, JX, W, dc]
            #         Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq)  # [N, JQ, W, dc]
            #         Acx = tf.reshape(Acx, [-1, JX, W, dc])
            #         Acq = tf.reshape(Acq, [-1, JQ, W, dc])

            #         filter_sizes = list(map(int, config.out_channel_dims.split(',')))
            #         heights = list(map(int, config.filter_heights.split(',')))
            #         assert sum(filter_sizes) == dco, (filter_sizes, dco)
            #         with tf.variable_scope("conv"):
            #             xx = multi_conv1d(Acx, filter_sizes, heights, "VALID",  self.is_train, config.keep_prob, scope="xx")
            #             if config.share_cnn_weights:
            #                 tf.get_variable_scope().reuse_variables()
            #                 qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx")
            #             else:
            #                 qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq")
            #             xx = tf.reshape(xx, [-1, M, JX, dco])
            #             qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/gpu:7"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float')
                    # if config.use_glove_for_unk:
                    #     word_emb_mat = tf.concat(0, [word_emb_mat])
                    print(word_emb_mat)

                with tf.name_scope("word"):
                    print('embedding lookup')
                    Ax = tf.nn.embedding_lookup(word_emb_mat, self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat, self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                    print('embedding lookup ready')
                # if config.use_char_emb:
                #     xx = tf.concat(3, [xx, Ax])  # [N, M, JX, di]
                #     qq = tf.concat(2, [qq, Aq])  # [N, JQ, di]
                # else:
                xx = Ax
                qq = Aq

        # highway network
        #if config.highway:
        #    with tf.variable_scope("highway"):
        #        xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)
        #        tf.get_variable_scope().reuse_variables()
        #        qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        
        print('context emmbedding')
        cell = BasicLSTMCell(d, state_is_tuple=True)
        d_cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]
        print('prepro')
        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn(d_cell, d_cell, qq, q_len, dtype='float32', scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(2, [fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(3, [fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h 
        print('main pro')
        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d])
                q_mask = tf.reshape(tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ])
                first_cell = AttentionCell(cell, u, mask=q_mask, mapper='sim',
                                           input_keep_prob=self.config.input_keep_prob, is_train=self.is_train)
            else:
                p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict)
                first_cell = d_cell

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(first_cell, first_cell, p0, x_len, dtype='float', scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(3, [fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(first_cell, first_cell, g0, x_len, dtype='float', scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(3, [fw_g1, bw_g1])

            logits = get_logits([g1, p0], [config.batch_size,config.max_num_sents] , True, wd=config.wd, input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask, is_train=self.is_train, func='sigmoid', scope='logits1')
            # a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]), tf.reshape(logits, [N, M * JX]))
            # a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1])

            # (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(d_cell, d_cell, tf.concat(3, [p0, g1, a1i, g1 * a1i]),
            #                                               x_len, dtype='float', scope='g2')  # [N, M, JX, 2d]
            # g2 = tf.concat(3, [fw_g2, bw_g2])
            # logits2 = get_logits([g2, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob,
            #                      mask=self.x_mask,
            #                      is_train=self.is_train, func=config.answer_func, scope='logits2')

            # flat_logits = tf.reshape(logits, [-1, M * JX])
            # flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            # yp = tf.reshape(flat_yp, [-1, M, JX])
            yp = tf.greater(0.5, logits)
            # flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            # flat_yp2 = tf.nn.softmax(flat_logits2)
            # yp2 = tf.reshape(flat_yp2, [-1, M, JX])

            self.tensor_dict['g1'] = g1
            # self.tensor_dict['g2'] = g2

            self.logits = logits
            # self.logits2 = flat_logits2
            self.yp = yp
Пример #17
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size,  config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JQ = JX
        print('VC:{}  NEW_EMB:{}'.format(VW, self.new_emb_mat.get_shape()))
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")

                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]

                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]

                else:
                    xx = Ax
                    qq = Aq
                    xx = tf.reshape(xx, [-1, M, JX, d])
                    qq = tf.reshape(qq, [-1, JQ, d])
            if config.use_pos_emb:
                with tf.variable_scope("pos_onehot"), tf.device("/cpu:0"):
                    pos_x = tf.one_hot(
                        self.x_pos, depth=config.pos_tag_num)  # [N,M,JX,depth]
                    pos_q = tf.one_hot(
                        self.q_pos, depth=config.pos_tag_num)  # [N,JQ,depth]
                    xx = tf.concat(axis=3, values=[xx,
                                                   pos_x])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, pos_q])
            if config.use_sem_emb:
                with tf.variable_scope("sem_onehot"), tf.device("/cpu:0"):
                    sem_x = tf.one_hot(self.x_sem, depth=3)  # [N,M,JX,3]
                    sem_q = tf.one_hot(self.q_sem, depth=3)  # [N,JQ,3]
                    xx = tf.concat(axis=3, values=[xx, sem_x])
                    qq = tf.concat(axis=2, values=[qq, sem_q])
            if config.use_neg_emb:
                with tf.variable_scope("neg_onehot"), tf.device("/cpu:0"):
                    neg_x = tf.one_hot(self.x_neg, depth=2)  # [N,M,JX,2]
                    neg_q = tf.one_hot(self.q_neg, depth=2)  # [N,JQ,2]
                    xx = tf.concat(axis=3, values=[xx, neg_x])
                    qq = tf.concat(axis=2, values=[qq, neg_q])

        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell_fw2 = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw2 = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw2 = SwitchableDropoutWrapper(
            cell_fw2, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw2 = SwitchableDropoutWrapper(
            cell_bw2, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]
        if config.lstm:
            with tf.variable_scope("prepro"):
                (fw_u, bw_u), ((_, fw_u_f),
                               (_, bw_u_f)) = bidirectional_dynamic_rnn(
                                   d_cell_fw,
                                   d_cell_bw,
                                   qq,
                                   q_len,
                                   dtype='float',
                                   scope='u1')  # [N, J, d], [N, d]
                print('fw_u_f hsape:{}'.format(fw_u_f.get_shape()))
                u = tf.concat(axis=2, values=[fw_u, bw_u])  #[N,JQ,2d]
                if config.share_lstm_weights:
                    tf.get_variable_scope().reuse_variables()
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell_fw, cell_bw, xx, x_len, dtype='float',
                        scope='u1')  # [N, M, JX, 2d]
                    h = tf.concat(axis=3, values=[fw_h,
                                                  bw_h])  # [N, M, JX, 2d]
                    print('fw_u_f nn hsape:{}'.format(fw_u_f.get_shape()))
                else:
                    (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                        cell_fw, cell_bw, xx, x_len, dtype='float',
                        scope='h1')  # [N, M, JX, 2d]
                    h = tf.concat(axis=3, values=[fw_h,
                                                  bw_h])  # [N, M, JX, 2d]
                self.tensor_dict['u'] = u
                self.tensor_dict['h'] = h
        else:
            h = xx
            u = qq
        h1 = h[:, 0, :, :]
        h2 = h[:, 1, :, :]
        h3 = h[:, 2, :, :]
        h4 = h[:, 3, :, :]

        n_1 = tf.reshape(self.x_mask[:, 0, :], [N, JX])
        n_2 = tf.reshape(self.x_mask[:, 1, :], [N, JX])
        n_3 = tf.reshape(self.x_mask[:, 2, :], [N, JX])
        n_4 = tf.reshape(self.x_mask[:, 3, :], [N, JX])

        if config.self_attention:
            with tf.variable_scope("h_self_weight"):

                print(h.get_shape())
                for i in range(2):
                    with tf.variable_scope("self-attention"):

                        h1 = self_attention_layer(
                            config,
                            self.is_train,
                            h1,
                            p_mask=tf.expand_dims(n_1, -1),
                            scope="{}_layer_self_att_enc_e".format(
                                i))  # [N, len, dim]
                        tf.get_variable_scope().reuse_variables()
                        h2 = self_attention_layer(
                            config,
                            self.is_train,
                            h2,
                            p_mask=tf.expand_dims(n_2, -1),
                            scope="{}_layer_self_att_enc_e".format(i))
                        tf.get_variable_scope().reuse_variables()
                        h3 = self_attention_layer(
                            config,
                            self.is_train,
                            h3,
                            p_mask=tf.expand_dims(n_3, -1),
                            scope="{}_layer_self_att_enc_e".format(i))
                        tf.get_variable_scope().reuse_variables()
                        h4 = self_attention_layer(
                            config,
                            self.is_train,
                            h4,
                            p_mask=tf.expand_dims(n_4, -1),
                            scope="{}_layer_self_att_enc_e".format(i))
                    with tf.variable_scope("self-attention"):
                        u = self_attention_layer(
                            config,
                            self.is_train,
                            u,
                            p_mask=tf.expand_dims(self.q_mask, -1),
                            scope="{}_layer_self_att_enc_p".format(i))
        if config.plot_encoder == "concate":
            h = tf.concat([h1, h2, h3, h4], axis=1)
            print("h concate shape".format(h.get_shape()))
            n_n = tf.concat([n_1, n_2, n_3, n_4], axis=1)
        elif config.plot_encoder == "sum":
            h1 = tf.expand_dims(h1, axis=1)
            h2 = tf.expand_dims(h2, axis=1)
            h3 = tf.expand_dims(h3, axis=1)
            h4 = tf.expand_dims(h4, axis=1)
            h = tf.concat([h1, h2, h3, h4], axis=1)

            h = tf.reduce_sum(h, axis=1)
            print("h sum shape".format(h.get_shape()))
        elif config.plot_encoder == "lstm":
            # h1 = tf.reduce_sum(h1, axis=1)
            h1 = tf.expand_dims(tf.reduce_sum(h1, axis=-1), axis=1)
            h2 = tf.expand_dims(tf.reduce_sum(h2, axis=-1), axis=1)
            h3 = tf.expand_dims(tf.reduce_sum(h3, axis=-1), axis=1)
            h4 = tf.expand_dims(tf.reduce_sum(h4, axis=-1), axis=1)
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw2,
                                             d_cell_bw2,
                                             tf.concat([h1, h2, h3, h4],
                                                       axis=1),
                                             dtype='float',
                                             scope='1')  # [N, J, d], [N, d]
            print('fw_u_f hsape:{}'.format(fw_u_f.get_shape()))
            h = tf.concat(axis=2, values=[fw_u, bw_u])  # [N,JQ,2d]
            u = tf.expand_dims(tf.reduce_sum(u, axis=-1), axis=1)
            tf.get_variable_scope().reuse_variables()
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw2,
                                             d_cell_bw2,
                                             tf.concat([u], axis=1),
                                             dtype='float',
                                             scope='1')  # [N, J, d], [N, d]
            print('fw_u_f hsape:{}'.format(fw_u_f.get_shape()))
            u = tf.concat(axis=2, values=[fw_u, bw_u])  # [N,JQ,2d]

        if config.interact:
            with tf.variable_scope("interact"):

                def get_attention(h, u, m):
                    JX = tf.shape(h)[1]
                    JQ = tf.shape(u)[1]
                    h = tf.expand_dims(h, 2)
                    u = tf.expand_dims(u, 1)
                    h = tf.tile(h, [1, 1, JQ, 1])
                    u = tf.tile(u, [1, JX, 1, 1])
                    attention = h * u  # N,JX,JQ,2d

                    return attention

                if config.plot_encoder == "concate":
                    attention = get_attention(h, u, M)
                else:
                    attention = get_attention(h, u, 1)

            with tf.variable_scope('conv_dense'):
                if config.plot_encoder == "concate":
                    out_final = dense_net(config, attention, self.is_train)
                else:
                    out_final = tf.reshape(attention, shape=[N, -1])

        else:
            h = tf.reshape(h, [-1, M * 2 * d * JX])
            print("h shape {}".format(h.get_shape()))
            u = tf.reshape(u, [-1, 2 * d * JQ])
            print("U shape {}".format(u.get_shape()))
            attention = tf.concat([h, u], axis=-1)
            out_final = attention

            out_final = linear(tf.concat([attention], axis=-1),
                               1000,
                               True,
                               bias_start=0.0,
                               scope="logit8",
                               squeeze=False,
                               wd=config.wd,
                               input_keep_prob=config.output_keep_pro,
                               is_train=self.is_train)
            out_final = tf.nn.relu(out_final)
            out_final = linear(tf.concat([out_final], axis=-1),
                               400,
                               True,
                               bias_start=0.0,
                               scope="logit9",
                               squeeze=False,
                               wd=config.wd,
                               input_keep_prob=config.output_keep_pro,
                               is_train=self.is_train)
            out_final = tf.nn.relu(out_final)

            out_final = linear(out_final,
                               300,
                               True,
                               bias_start=0.0,
                               scope="logit3",
                               squeeze=False,
                               wd=config.wd,
                               input_keep_prob=config.output_keep_pro,
                               is_train=self.is_train)

            out_final = tf.nn.relu(out_final)

        with tf.variable_scope('conv_dense'):

            if config.hao:
                out_final = linear(tf.concat(
                    [out_final, self.haoruopeng_feature], axis=-1),
                                   200,
                                   True,
                                   bias_start=0.0,
                                   scope="logit",
                                   squeeze=False,
                                   wd=config.wd,
                                   input_keep_prob=config.output_keep_pro,
                                   is_train=self.is_train)
                out_final = tf.nn.relu(out_final)
                out_final = linear(out_final,
                                   100,
                                   True,
                                   bias_start=0.0,
                                   scope="logit3",
                                   squeeze=False,
                                   wd=config.wd,
                                   input_keep_prob=config.output_keep_pro,
                                   is_train=self.is_train)

                out_final = tf.nn.relu(out_final)
            else:
                out_final = linear(tf.concat([out_final], axis=-1),
                                   200,
                                   True,
                                   bias_start=0.0,
                                   scope="logit",
                                   squeeze=False,
                                   wd=config.wd,
                                   input_keep_prob=config.output_keep_pro,
                                   is_train=self.is_train)
                out_final = linear(out_final,
                                   100,
                                   True,
                                   bias_start=0.0,
                                   scope="logit3",
                                   squeeze=False,
                                   wd=config.wd,
                                   input_keep_prob=config.output_keep_pro,
                                   is_train=self.is_train)

                out_final = tf.nn.relu(out_final)

            self.tensor_dict['outfinal'] = out_final
            self.prediction = linear(tf.concat([out_final], axis=-1),
                                     1,
                                     True,
                                     bias_start=0.0,
                                     scope="logit2",
                                     squeeze=False,
                                     wd=config.wd,
                                     input_keep_prob=config.output_keep_pro,
                                     is_train=self.is_train)
Пример #18
0
 def get_drnncell(self):
     cell = BasicLSTMCell(self.config.hidden_size, state_is_tuple=True, reuse=False)
     d_cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=self.config.input_keep_prob)
     return d_cell
Пример #19
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W ,EW, WOW= \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.len_new_emb_mat, config.char_vocab_size, config.hidden_size, \
            config.max_word_size,config.word_vocab_size-config.vw_wo_entity_size,config.vw_wo_entity_size
        JX = tf.shape(self.x)[2]  # words
        JQ = tf.shape(self.q)[1]  # words
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        init_word_emb = tf.random_normal_initializer(-0.5, 0.5)
                        #entity_emb_mat = tf.get_variable("entity_emb_mat", dtype='float', shape=[EW, EW], initializer=get_initializer(config.onehot_encoded))
                        #entity_emb_out = _linear(entity_emb_mat, dw, True, bias_initializer=tf.constant_initializer(0.0))
                        #word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=init_word_emb)
                        #word_emb_mat = tf.concat(axis=0,values=[word_emb_mat, entity_emb_out])
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')

                    #if config.use_glove_for_unk:
                    #    word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])
                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(
                        word_emb_mat, self.x
                    )  # [N, M, JX, d] i.e. [batch size, max sentences, max words, embedding size]
                    Aq = tf.nn.embedding_lookup(
                        word_emb_mat, self.q
                    )  # [N, JQ, d] i.e. [batch size, max words, embedding size]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        #xx = tf.Print(xx,[tf.shape(xx),xx],message="DHRUV xx=",summarize=20)
        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N,M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), (fw_s, bw_s) = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]

            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:  # not true
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(
                    config,
                    self.is_train,
                    h,
                    u,
                    h_mask=self.x_mask,
                    u_mask=self.q_mask,
                    scope="p0",
                    tensor_dict=self.tensor_dict)  # p0 seems to be G in paper
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            #p1 = tf.reshape(p0,[N , M*JX, 8*d])
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), (my_fw_final_state,
                             my_bw_final_state) = bidirectional_dynamic_rnn(
                                 second_cell_fw,
                                 second_cell_bw,
                                 g0,
                                 x_len,
                                 dtype='float',
                                 scope='g1')  # [N, M, JX, 2d]

            g1 = tf.concat(axis=3, values=[fw_g1,
                                           bw_g1])  # g1 seems to be M in paper
            #g1= tf.reshape(g1,[N, M , JX, 2*d]) #reshaping here again, since g1 is used ahead

            g1 = tf.Print(g1, [tf.shape(g1)],
                          message="g1 shape",
                          first_n=5,
                          summarize=200)
            p0 = tf.Print(p0, [tf.shape(p0)],
                          message="p0 shape",
                          first_n=5,
                          summarize=200)

            g11 = tf.reshape(g1, [N, -1, 2 * d])
            my_encoder_final_state_c = tf.concat(
                values=(my_fw_final_state.c, my_bw_final_state.c),
                axis=1,
                name="my_encoder_final_state_c")
            my_encoder_final_state_h = tf.concat(
                values=(my_fw_final_state.h, my_bw_final_state.h),
                axis=1,
                name="my_encoder_final_state_h")
            my_encoder_final_state = tf.contrib.rnn.LSTMStateTuple(
                c=my_encoder_final_state_c, h=my_encoder_final_state_h)

            #compute indices for finding span as the second task in multi task learning
            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            logits = tf.Print(logits, [tf.shape(logits)],
                              message="logits shape",
                              first_n=5,
                              summarize=200)
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_logits = tf.Print(flat_logits,
                                   [tf.shape(flat_logits), flat_logits],
                                   message="flat_logits shape and contents",
                                   first_n=5,
                                   summarize=200)
            self.flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            self.flat_yp2 = tf.nn.softmax(flat_logits2)

            tgt_vocab_size = config.len_new_emb_mat  # hparam # FIXME: Obtain embeddings differently?
            print("length is", config.len_new_emb_mat)
            nodes = d

            # Look up embedding
            decoder_emb_inp = tf.nn.embedding_lookup(
                word_emb_mat,
                self.decoder_inputs)  # [batch_size, max words, embedding_size]

            with tf.variable_scope("rnn_decoder", reuse=tf.AUTO_REUSE):
                init = tf.random_normal_initializer(0.0, 0.5)
                W_dense = tf.get_variable(name="W_dense",
                                          shape=[2 * nodes, tgt_vocab_size],
                                          dtype=tf.float32,
                                          initializer=init)
                b_dense = tf.get_variable(name="b_dense",
                                          shape=[tgt_vocab_size],
                                          dtype=tf.float32,
                                          initializer=tf.zeros_initializer)

                W_att_dec = tf.get_variable(name="W_att_dec",
                                            shape=[2 * nodes, 2 * nodes],
                                            dtype=tf.float32,
                                            initializer=init)
                W_att_enc = tf.get_variable(name="W_att_enc1",
                                            shape=[1, 1, 2 * nodes, 2 * nodes],
                                            dtype=tf.float32,
                                            initializer=init)
                v_blend = tf.get_variable(name="v_blend",
                                          shape=[1, 2 * nodes],
                                          dtype=tf.float32,
                                          initializer=init)

                pad_time_slice = tf.fill([N], 0, name='PAD')
                pad_step_embedded = tf.nn.embedding_lookup(
                    word_emb_mat, pad_time_slice)

                decoder_cell = tf.contrib.rnn.BasicLSTMCell(
                    2 * nodes, state_is_tuple=True
                )  # doesnt work without the factor of 2??
                '''Loop transition function is a mapping (time, previous_cell_output, previous_cell_state, previous_loop_state) -> 
                (elements_finished, input, cell_state, output, loop_state).
                 It is called before RNNCell to prepare its inputs and state. Everything is a Tensor except for initial call at time=0 
                 when everything is None (except time).'''
                def execute_pointer_network(attn_dist):
                    #this is to find the word in the summary, which recieved highest probability and pass it to the next step in decoder
                    index_pos = tf.argmax(attn_dist, axis=1)
                    index_pos = tf.expand_dims(index_pos, 1)
                    index_pos = tf.concat([
                        tf.reshape(tf.range(start=0, limit=N, dtype=tf.int64),
                                   [N, 1]),
                        tf.zeros([N, 1], tf.int64), index_pos
                    ],
                                          axis=1)
                    index_pos = tf.cast(tf.gather_nd(params=self.x,
                                                     indices=index_pos),
                                        dtype=tf.int64)
                    return index_pos

                def execute_normal_decoder(previous_output, W_dense, b_dense):
                    output_logits = tf.add(tf.matmul(previous_output, W_dense),
                                           b_dense)
                    return tf.argmax(output_logits, axis=1)

                def loop_fn_initial():
                    initial_elements_finished = (
                        0 >= self.target_sequence_length
                    )  # all False at the initial step
                    #initial_input = tf.concat([decoder_emb_inp[:,0], my_encoder_final_state_h], 1)
                    initial_input = decoder_emb_inp[:, 0]
                    initial_cell_state = my_encoder_final_state
                    #setting the correct shapes , as it is used to determine the emit structure
                    initial_cell_output = tf.cond(
                        self.pointer_gen,
                        lambda: tf.zeros([M * JX], tf.float32),
                        lambda: tf.zeros([2 * nodes], tf.float32))
                    initial_loop_state = None  # we don't need to pass any additional information
                    return (initial_elements_finished, initial_input,
                            initial_cell_state, initial_cell_output,
                            initial_loop_state)

                encoder_output = tf.expand_dims(g11, axis=2)

                def loop_fn_transition(time, previous_output, previous_state,
                                       previous_loop_state):
                    def get_next_input():
                        # compute Badhanau style attention
                        #performing convolution or reshaping input to (-1,2*d) and then doing matmul, is essentially the same operation
                        #see matrix_mult.py...conv2d might be faster??
                        #https://stackoverflow.com/questions/38235555/tensorflow-matmul-of-input-matrix-with-batch-data
                        encoder_features = tf.nn.conv2d(
                            encoder_output, W_att_enc, [1, 1, 1, 1], "SAME"
                        )  # shape (batch_size,max_enc_steps,1,attention_vec_size)
                        dec_portion = tf.matmul(previous_state.h, W_att_dec)
                        decoder_features = tf.expand_dims(
                            tf.expand_dims(dec_portion, 1), 1
                        )  # reshape to (batch_size, 1, 1, attention_vec_size)
                        #python broadcasting will alllow the two features to get added
                        e_not_masked = tf.reduce_sum(
                            v_blend *
                            tf.nn.tanh(encoder_features + decoder_features),
                            [2, 3])  # calculate e, (batch_size, max_enc_steps)
                        #The shape of output of a softmax is the same as the input: it just normalizes the values.
                        attn_dist = tf.nn.softmax(
                            e_not_masked)  # (batch_size, max_enc_steps)
                        attn_dist = tf.Print(attn_dist, [tf.shape(attn_dist)],
                                             message="attn_dist",
                                             first_n=5,
                                             summarize=200)

                        #Multiplying all the 2d vectors with same attn_dist values,and finally keeping 1 2d vector for every batch example
                        context_vector = tf.reduce_sum(
                            tf.reshape(attn_dist, [N, -1, 1, 1]) *
                            encoder_output,
                            [1, 2])  # shape (batch_size, attn_size).
                        context_vector = tf.reshape(context_vector,
                                                    [-1, 2 * nodes])
                        #next_input = tf.cond(self.is_train, lambda: tf.concat(
                        #    [tf.reshape(decoder_emb_inp[:, time], (N, dw)), context_vector], 1),
                        #                     lambda: tf.concat([tf.nn.embedding_lookup(word_emb_mat, prediction), context_vector], 1))
                        #output_logits = tf.add(tf.matmul(previous_output, W_dense), b_dense)
                        prediction = tf.cond(
                            self.pointer_gen,
                            lambda: execute_pointer_network(attn_dist),
                            lambda: execute_normal_decoder(
                                previous_output, W_dense, b_dense))

                        with tf.variable_scope("modified_dec_inputs",
                                               reuse=tf.AUTO_REUSE):
                            next_input = tf.cond(
                                self.is_train,
                                lambda: _linear(args=[context_vector] + [
                                    tf.reshape(decoder_emb_inp[:, time],
                                               (N, dw))
                                ],
                                                output_size=dw,
                                                bias=True),
                                lambda: _linear([context_vector] + [
                                    tf.nn.embedding_lookup(
                                        word_emb_mat, prediction)
                                ], dw, True))

                        return next_input, attn_dist

                    elements_finished = (
                        time >= self.target_sequence_length
                    )  # this operation produces boolean tensor of [batch_size]
                    # defining if corresponding sequence has ended
                    finished = tf.reduce_all(
                        elements_finished)  # -> boolean scalar
                    #input = tf.cond(finished, lambda: tf.concat([pad_step_embedded, my_encoder_final_state_h], 1),get_next_input)
                    input, attn_distribution = tf.cond(
                        finished, lambda:
                        (pad_step_embedded, tf.zeros([N, M * JX], tf.float32)),
                        get_next_input)
                    attn_distribution = tf.Print(attn_distribution,
                                                 [tf.shape(attn_distribution)],
                                                 message="attn_distribution",
                                                 first_n=5,
                                                 summarize=200)
                    state = previous_state
                    output = tf.cond(self.pointer_gen,
                                     lambda: attn_distribution,
                                     lambda: previous_output)
                    output = tf.Print(output, [tf.shape(output)],
                                      message="OUTPUT",
                                      first_n=5,
                                      summarize=200)

                    loop_state = None

                    return (elements_finished, input, state, output,
                            loop_state)

                def loop_fn(time, previous_output, previous_state,
                            previous_loop_state):
                    if previous_state is None:  # time == 0
                        assert previous_output is None and previous_state is None
                        return loop_fn_initial()
                    else:
                        return loop_fn_transition(time, previous_output,
                                                  previous_state,
                                                  previous_loop_state)

                decoder_outputs_ta, decoder_final_state, _ = tf.nn.raw_rnn(
                    decoder_cell, loop_fn)
                decoder_outputs = decoder_outputs_ta.stack()
                decoder_outputs = tf.Print(decoder_outputs,
                                           [tf.shape(decoder_outputs)],
                                           message="decoder_outputs",
                                           first_n=5,
                                           summarize=200)

                # To do output projection, we have to temporarilly flatten decoder_outputs from [max_steps, batch_size, hidden_dim] to
                #  [max_steps*batch_size, hidden_dim], as tf.matmul needs rank-2 tensors at most.
                decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(
                    tf.shape(decoder_outputs))
                decoder_outputs_flat = tf.reshape(decoder_outputs,
                                                  (-1, decoder_dim))
                #if pointer networks, no need to pass through dense layer
                decoder_logits_flat = tf.cond(
                    self.pointer_gen,
                    lambda: decoder_outputs_flat, lambda: tf.add(
                        tf.matmul(decoder_outputs_flat, W_dense), b_dense))
                decoder_logits = tf.cond(
                    self.pointer_gen, lambda: tf.reshape(
                        decoder_logits_flat,
                        (decoder_max_steps, decoder_batch_size, decoder_dim)),
                    lambda: tf.reshape(decoder_logits_flat,
                                       (decoder_max_steps, decoder_batch_size,
                                        tgt_vocab_size)))
                decoder_logits = _transpose_batch_time(decoder_logits)
                #decoder_prediction = tf.argmax(decoder_logits, -1)

            #self.decoder_logits_train = final_outputs.rnn_output
            self.decoder_logits_train = decoder_logits
            self.index_start = flat_logits
            self.index_end = flat_logits2
Пример #20
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat))
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])

                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            config.ruminating_layer = True

            if config.ruminating_layer:
                '''
                RUMINATING LAYER
                '''

                with tf.variable_scope('rum_layer'):
                    print('-' * 5 + "RUMINATING LAYER" + '-' * 5)
                    print("Context", xx)  #[N,M,JX,2d]
                    print("Question", qq)  #[N,JQ,2d]
                    print("p0", p0)  #[N,M,JX,8D]

                    sum_cell = BasicLSTMCell(d, state_is_tuple=True)
                    (s_f,
                     s_b), _ = bidirectional_dynamic_rnn(sum_cell,
                                                         sum_cell,
                                                         p0,
                                                         x_len,
                                                         dtype=tf.float32,
                                                         scope="sum_layer")

                    batch_lens = (tf.reshape(x_len, [N * M]))
                    s_f = tf.reshape(s_f, [N * M, JX, d])
                    s_b = tf.reshape(s_b, [N * M, JX, d])

                    s_fout = tf.reshape(extract_axis_1(s_f, batch_lens),
                                        [N, M, d])
                    s_bout = tf.reshape(extract_axis_1(s_b, batch_lens),
                                        [N, M, d])

                    s = tf.concat(axis=2, values=[s_fout, s_bout])  # [N,M,2d]

                    print("summarization layer", s)

                    print('-' * 5 + "QUESTION RUMINATE LAYER" + '-' * 5)

                    S_Q = tf.tile(tf.expand_dims(s, 2),
                                  [1, 1, JQ, 1])  # [N,M,JQ,2d]
                    S_cell_fw = BasicLSTMCell(d, state_is_tuple=True)
                    S_cell_bw = BasicLSTMCell(d, state_is_tuple=True)
                    (fw_hq,
                     bw_hq), _ = bidirectional_dynamic_rnn(S_cell_fw,
                                                           S_cell_bw,
                                                           S_Q,
                                                           q_len,
                                                           dtype=tf.float32,
                                                           scope="S_Q")
                    S_Q = tf.concat(axis=3, values=[fw_hq, bw_hq])
                    q_m = tf.reshape(tf.expand_dims(qq, 1), [N, M, JQ, 2 * d])

                    with tf.variable_scope("question_rum_layer"):
                        Q_hat = ruminating_layer(S_Q, q_m, N, M, JQ, d)

                    print("Q_hat", Q_hat)  #[N,M,JQ,2d]

                    print('-' * 5 + "CONTEXT RUMINATE LAYER" + '-' * 5)
                    S_C = tf.tile(tf.expand_dims(s, 2),
                                  [1, 1, JX, 1])  # [N,M,JX,2d]

                    C_cell_fw = BasicLSTMCell(d, state_is_tuple=True)
                    C_cell_bw = BasicLSTMCell(d, state_is_tuple=True)
                    (fw_h,
                     bw_h), _ = bidirectional_dynamic_rnn(C_cell_fw,
                                                          C_cell_bw,
                                                          S_C,
                                                          x_len,
                                                          dtype=tf.float32,
                                                          scope="S_C")
                    S_C = tf.concat(axis=3, values=[fw_h, bw_h])  #[N,M,JX,2d]
                    with tf.variable_scope("context_rum_layer"):
                        C_hat = ruminating_layer(S_C, xx, N, M, JX, d)

                    print("C_hat", C_hat)  #[N,M,JX,2d]

                    #Second Hop bi-Attention

                    print('-' * 5 + "SECOND HOP ATTENTION" + '-' * 5)
                    sh_aug = tf.tile(tf.expand_dims(C_hat, 3),
                                     [1, 1, 1, JQ, 1])  #[N,M,JX,2d]
                    su_aug = tf.tile(tf.expand_dims(Q_hat, 2),
                                     [1, 1, JX, 1, 1])  #[N,M,JQ,2d]

                    sh_mask_aug = tf.tile(tf.expand_dims(self.x_mask, -1),
                                          [1, 1, 1, JQ])
                    su_mask_aug = tf.tile(
                        tf.expand_dims(tf.expand_dims(self.q_mask, 1), 1),
                        [1, M, JX, 1])
                    shu_mask = sh_mask_aug & su_mask_aug
                    su_logits = get_logits([sh_aug, su_aug],
                                           None,
                                           True,
                                           wd=config.wd,
                                           mask=shu_mask,
                                           is_train=True,
                                           func=config.logit_func,
                                           scope='su_logits')
                    su_a = softsel(su_aug, su_logits)
                    sh_a = softsel(C_hat, tf.reduce_max(su_logits, 3))
                    sh_a = tf.tile(tf.expand_dims(sh_a, 2), [1, 1, JX, 1])
                    p00 = tf.concat(
                        axis=3,
                        values=[C_hat, su_a, C_hat * su_a, C_hat * sh_a])
                    print("p00", p00)  #[N,M,JX,8d]
                    p0 = p00
                    print('-' * 5 + "END RUMINATING LAYER" + '-' * 5)

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                second_cell_fw,
                second_cell_bw,
                g0,
                x_len,
                dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)
            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            if config.na:
                na_bias = tf.get_variable("na_bias", shape=[], dtype='float')
                na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]),
                                        [N, 1])  # [N, 1]
                concat_flat_logits = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits])
                concat_flat_yp = tf.nn.softmax(concat_flat_logits)
                na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]),
                                     [1])
                flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1])

                concat_flat_logits2 = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits2])
                concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2)
                na_prob2 = tf.squeeze(
                    tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1])  # [N]
                flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1])

                self.concat_logits = concat_flat_logits
                self.concat_logits2 = concat_flat_logits2
                self.na_prob = na_prob * na_prob2

            yp = tf.reshape(flat_yp, [-1, M, JX])
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])
            wyp = tf.nn.sigmoid(logits2)

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
            self.wyp = wyp